Esempio n. 1
0
 def _createExternalStore():
     from toil.jobStores.azureJobStore import _fetchAzureAccountKey
     blobService = BlobService(account_key=_fetchAzureAccountKey(AzureJobStoreTest.accountName),
                               account_name=AzureJobStoreTest.accountName)
     containerName = 'import-export-test-%s' % uuid.uuid4()
     blobService.create_container(containerName)
     return containerName
	def __create_blob_container(self, storage_acc_name):
		sms = self.__get_service_mgmt_object()

		# Retrieve the primary key of your storage account
		# Maybe the secondary key works too?
		storage_acc_key = None
		acounts = sms.list_storage_accounts()
		for account in acounts:
		    if account.service_name == storage_acc_name:
		        storageServiceObj = sms.get_storage_account_keys(account.service_name)
		        storage_acc_key = storageServiceObj.storage_service_keys.primary


		# Create a container
		blob_service = BlobService(account_name=storage_acc_name,
		                           account_key=storage_acc_key)

		container_name = namesgenerator.get_random_name()
		container_name += "container"
		blob_service.create_container(container_name)

		# This is the url to the container we just created
		container_url_template = "http://{}.blob.core.windows.net/{}"
		container_url = container_url_template.format(storage_acc_name, container_name)
		#print "Created blob container with URL ",container_url
		return container_url
def StateThread():
    
    global initializeNow
    global isPrimary
    while 1:
        try:
            currentHost = socket.gethostname()
            blob_service = BlobService(account_name=azureStorageAccountName, account_key=azureStorageAccountKey)
            if (initializeNow == True):
                initializeNow = False
                print("Initializing '" + currentHost + "' as primary.")
                newContents = currentHost
                blob_service.create_container(container)
                blob_service.put_block_blob_from_text(container, blob, newContents)

            while 1:
                print("Downloading current state.")
                currentContents = blob_service.get_blob_to_text(container, blob)
                if (currentContents==currentHost):
                    isPrimary = True
                    print("isPrimary = True")
                    # we have now received status, if second thread NOT running, start
                    if (t2.isAlive() == False):
                        t2.start()
                elif (currentContents!=currentHost and currentContents.count>0):
                    isPrimary = False
                    print("isPrimary = False")
                    # we have now received status, if second thread NOT running, start
                    if (t2.isAlive() == False):
                        t2.start()
                sleep(.1)
        except Exception as e:
            print ("Error in MainStateThread: " + e)
    def __get_available_storage_account_and_container(self, hackathon_id):
        """
        Get available storage account and container

        :param hackathon_id: the id of hackathon
        :type hackathon_id: integer

        :return: if there is available storage account and container, then return (True, storage
                 account name, container name). Otherwise, return (False, None, None)
        :rtype: 3-element tuple: (bool, str|unicode, str|unicode)
        """
        container_name = self.util.safe_get_config(
            'dockerhostserver.azure.container', 'dockerhostprivatecontainer')
        sms = self.__get_sms_object(hackathon_id)
        if sms is None:
            self.log.error(
                'Something wrong with Azure account of Hackathon:%d' %
                hackathon_id)
            return False, None, None
        storage_accounts = sms.list_storage_accounts()
        # check storage account one by one, return True once find a qualified one
        for storage in storage_accounts.storage_services:
            try:
                storage_response = sms.get_storage_account_keys(
                    storage.service_name)
            except Exception as e:
                self.log.error(
                    'Encounter an error when checking storage_account:%s ' %
                    storage.service_name)
                self.log.error(e)
                continue
            blob_service = BlobService(
                account_name=storage.service_name,
                account_key=storage_response.storage_service_keys.primary,
                host_base=self.util.safe_get_config(
                    'dockerhostserver.storage.host_base',
                    '.blob.core.chinacloudapi.cn'))
            try:
                blob_service.get_container_metadata(container_name)
                return True, storage.service_name, container_name
            except Exception as e:
                if e.message != AzureApiExceptionMessage.CONTAINER_NOT_FOUND:
                    self.log.error(
                        'Encounter an error when checking container:%s ' %
                        container_name)
                    self.log.error(e)
                    continue
            try:
                blob_service.create_container(
                    container_name=container_name,
                    x_ms_blob_public_access='container')
                return True, storage.service_name, container_name
            except Exception as e:
                self.log.error(
                    'Encounter an error when creating container:%s ' %
                    container_name)
                self.log.error(e)
        return False, None, None
Esempio n. 5
0
    def _createExternalStore(self):
        from toil.jobStores.azureJobStore import _fetchAzureAccountKey
        from azure.storage.blob import BlobService

        blobService = BlobService(account_key=_fetchAzureAccountKey(self.accountName),
                                  account_name=self.accountName)
        containerName = 'import-export-test-%s' % uuid.uuid4()
        blobService.create_container(containerName)
        return containerName
Esempio n. 6
0
    def _createExternalStore(self):
        from toil.jobStores.azureJobStore import _fetchAzureAccountKey
        from azure.storage.blob import BlobService

        blobService = BlobService(account_key=_fetchAzureAccountKey(self.accountName),
                                  account_name=self.accountName)
        containerName = 'import-export-test-%s' % uuid.uuid4()
        blobService.create_container(containerName)
        return containerName
Esempio n. 7
0
class AzureStorage(Storage):
    account_name = settings.AZURE_ACCOUNT_NAME
    account_key = settings.AZURE_ACCOUNT_KEY
    azure_container = settings.AZURE_CONTAINER

    def __init__(self, *args, **kwargs):
        super(AzureStorage, self).__init__(*args, **kwargs)
        self._connection = None

    @property
    def connection(self):
        if self._connection is None:
            # Create connection
            self._connection = BlobService(self.account_name, self.account_key)
            
            # Create container if needed
            containers = [c for c in self._connection.list_containers(prefix=self.azure_container) if c.name == self.azure_container ]
            if len(containers) == 0:
                self._connection.create_container(self.azure_container, {'origin': 'created by Django web app'}, fail_on_exist=True)

        return self._connection

    def _open(self, name, mode="rb"):
        stream = SimpleUploadedFile(name, None)
        self.connection.get_blob_to_file(self.azure_container, name, stream)
        stream.seek(0)
        return stream

    def exists(self, name):
        try:
            self.connection.get_blob_properties(self.azure_container, name)
        except AzureMissingResourceHttpError:
            return False
        else:
            return True

    def delete(self, name):
        self.connection.delete_blob(self.azure_container, name)

    def size(self, name):
        properties = self.connection.get_blob_properties(self.azure_container, name)
        return properties["content-length"]

    def _save(self, name, content):
        self.connection.put_block_blob_from_file(self.azure_container, name, content)
        return name

    def url(self, name):
        ap = AccessPolicy(expiry=(timezone.datetime.utcnow() + timezone.timedelta(seconds=600)).strftime('%Y-%m-%dT%H:%M:%SZ'), \
                          start=(timezone.datetime.utcnow() + timezone.timedelta(seconds=-600)).strftime('%Y-%m-%dT%H:%M:%SZ'), \
                          permission='r')
        sap = SharedAccessPolicy(ap)
        sas = SharedAccessSignature(self.account_name, self.account_key)
        url = sas.generate_signed_query_string(path=self.azure_container + '/' + name, resource_type='b', shared_access_policy=sap)
        
        return self.connection.make_blob_url(self.azure_container, name) + "?" + url
Esempio n. 8
0
class _BlobStorageFileHandler(object):
    def __init__(self,
                 account_name=None,
                 account_key=None,
                 protocol='https',
                 container='logs',
                 zip_compression=False,
                 max_connections=1,
                 max_retries=5,
                 retry_wait=1.0):
        self.service = BlobService(account_name, account_key, protocol)
        self.container_created = False
        hostname = gethostname()
        self.meta = {
            'hostname': hostname.replace('_', '-'),
            'process': os.getpid()
        }
        self.container = (container % self.meta).lower()
        self.meta['hostname'] = hostname
        self.zip_compression = zip_compression
        self.max_connections = max_connections
        self.max_retries = max_retries
        self.retry_wait = retry_wait

    def put_file_into_storage(self, dirName, fileName):
        """
        Ship the outdated log file to the specified blob container.
        """
        if not self.container_created:
            self.service.create_container(self.container)
            self.container_created = True
        fd, tmpfile_path = None, ''
        try:
            file_path = os.path.join(dirName, fileName)
            if self.zip_compression:
                suffix, content_type = '.zip', 'application/zip'
                fd, tmpfile_path = mkstemp(suffix=suffix)
                with os.fdopen(fd, 'wb') as f:
                    with ZipFile(f, 'w', ZIP_DEFLATED) as z:
                        z.write(file_path, arcname=fileName)
                file_path = tmpfile_path
            else:
                suffix, content_type = '', 'text/plain'
            self.service.put_block_blob_from_path(
                self.container,
                fileName + suffix,
                file_path,
                x_ms_blob_content_type=content_type,
                max_connections=self.max_connections,
                max_retries=self.max_retries,
                retry_wait=self.retry_wait)
        finally:
            if self.zip_compression and fd:
                os.remove(tmpfile_path)
Esempio n. 9
0
class azure_storage_writer (object):
    """storage operation wrapper, desiged for writing logs to storage"""

    def __init__(self, account_name, account_key, container, prefix):
        self._blob = BlobService(account_name=account_name, account_key=account_key)
        self._cur_path = None
        self._buf = io.StringIO()
        self._prefix = prefix
        self._container = container
        self._blob.create_container(container)
        self._logger = create_timed_rotating_log()

    def write_log(self, entity):
        path = self._get_path(entity[0])
        if (self._cur_path == None):
            self._cur_path = path
        elif(self._cur_path != path):
            self._dump_buf_to_storage()
            self._buf.close()
            self._buf = io.StringIO()
            self._cur_path = path
        self._buf.write(entity[1])
        self._buf.write("\n")

    def close(self):
        if (self._cur_path != None):
            self._dump_buf_to_storage()
            self._buf.close()

    def _dump_buf_to_storage(self):
        self._logger.info("Begin dump to azure blob")
        loop = 0;
        while True:
            try:
                self._blob.put_block_blob_from_text(self._container,self._cur_path, self._buf.getvalue())
                break
            except AzureHttpError as e:
                self._logger.warn("Hit an AzureHttpError " + str(e))
                self._logger.warn("Retry times: {0}".format(loop))
                loop = loop + 1
                if loop >= 3:
                    raise e
            except Exception as e:
                self._logger.warn("Hit an Exception " + str(e))
                raise e
        self._logger.info("Dump to azure blob succeeded.")

    def _get_path(self, timestamp):
        #timestamp = int(timestamp)
        d = datetime.fromtimestamp(int(timestamp))
        part = str.format("logs-part-{}.txt", d.minute // 5)
        path_str = d.strftime('%Y-%m-%d/%H')
        return str.format("{}/{}/{}", self._prefix, path_str, part)
Esempio n. 10
0
class _BlobStorageFileHandler(object):

    def __init__(self,
                  account_name=None,
                  account_key=None,
                  protocol='https',
                  container='logs',
                  zip_compression=False,
                  max_connections=1,
                  max_retries=5,
                  retry_wait=1.0):
        self.service = BlobService(account_name, account_key, protocol)
        self.container_created = False
        hostname = gethostname()
        self.meta = {'hostname': hostname.replace('_', '-'),
                     'process': os.getpid()}
        self.container = (container % self.meta).lower()
        self.meta['hostname'] = hostname
        self.zip_compression = zip_compression
        self.max_connections = max_connections
        self.max_retries = max_retries
        self.retry_wait = retry_wait

    def put_file_into_storage(self, dirName, fileName):
        """
        Ship the outdated log file to the specified blob container.
        """
        if not self.container_created:
            self.service.create_container(self.container)
            self.container_created = True
        fd, tmpfile_path = None, ''
        try:
            file_path = os.path.join(dirName, fileName)
            if self.zip_compression:
                suffix, content_type = '.zip', 'application/zip'
                fd, tmpfile_path = mkstemp(suffix=suffix)
                with os.fdopen(fd, 'wb') as f:
                    with ZipFile(f, 'w', ZIP_DEFLATED) as z:
                        z.write(file_path, arcname=fileName)
                file_path = tmpfile_path
            else:
                suffix, content_type = '', 'text/plain'
            self.service.put_block_blob_from_path(self.container,
                                                  fileName + suffix,
                                                  file_path,
                                                  x_ms_blob_content_type=content_type,
                                                  max_connections=self.max_connections,
                                                  max_retries=self.max_retries,
                                                  retry_wait=self.retry_wait)
        finally:
            if self.zip_compression and fd:
                os.remove(tmpfile_path)
def main():
    service = BlobService(settings.STORAGE_ACCOUNT_NAME,
                          settings.STORAGE_ACCOUNT_KEY)
    service.create_container(CONTAINER_NAME)

    process(service,
            LOCAL_BLOCK_BLOB_FILES,
            CONNECTION_COUNTS,
            is_page_blob=False)
    process(service,
            LOCAL_PAGE_BLOB_FILES,
            CONNECTION_COUNTS,
            is_page_blob=True)
def prepare_storage(settings):
    default_storage_account_name = settings["DEFAULT_STORAGE_ACCOUNT_NAME"]
    storage_access_key = settings["STORAGE_ACCESS_KEY"]

    blob_service = BlobService(default_storage_account_name,
                               storage_access_key)
    blob_service.create_container('bosh')
    blob_service.create_container(container_name='stemcell',
                                  x_ms_blob_public_access='blob')

    # Prepare the table for storing meta datas of storage account and stemcells
    table_service = TableService(default_storage_account_name,
                                 storage_access_key)
    table_service.create_table('stemcells')
def prepare_storage(settings):
    default_storage_account_name = settings["DEFAULT_STORAGE_ACCOUNT_NAME"]
    storage_access_key = settings["STORAGE_ACCESS_KEY"]

    blob_service = BlobService(default_storage_account_name, storage_access_key)
    blob_service.create_container('bosh')
    blob_service.create_container(
        container_name='stemcell',
        x_ms_blob_public_access='blob'
    )

    # Prepare the table for storing meta datas of storage account and stemcells
    table_service = TableService(default_storage_account_name, storage_access_key)
    table_service.create_table('stemcells')
def main(account_name, account_key):
    sc = SparkContext()
    sqlContext = SQLContext(sc)

    patient_records_container = 'patientrecords'
    glucose_levels_container = 'glucoselevelsaggs'
    preds_container = 'predictions'

    blob_service = BlobService(account_name=account_name, account_key=account_key)
    blob_service.create_container(preds_container)
    
    day_to_predict = get_most_recent_date(blob_service, glucose_levels_container)
    df = get_df_from_blob(blob_service, glucose_levels_container, patient_records_container, day_to_predict)
    
    project_path = 'wasb://model@{}.blob.core.windows.net/{}'
    si_pipe_model = PipelineModel.read().load(path=project_path.format(account_name, 'si_pipe_model'))
    oh_pipe_model = PipelineModel.read().load(path=project_path.format(account_name, 'oh_pipe_model'))
    model = RandomForestClassificationModel.read().load(path=project_path.format(account_name, 'model'))
    
    df_spark = sqlContext.createDataFrame(df)
    df_preds = si_pipe_model.transform(df_spark)
    df_preds = oh_pipe_model.transform(df_preds)
    
    num_var_names = ['time_in_hospital', 'num_lab_procedures', 'num_procedures', 'num_medications', 'number_outpatient',
                     'number_emergency', 'number_inpatient', 'diag_1', 'diag_2', 'diag_3', 'number_diagnoses', 'glucose_min',
                     'glucose_max', 'glucose_mean', 'glucose_var']
    cat_var_names = ['race', 'gender', 'age', 'weight', 'admission_type_id', 'discharge_disposition_id',
                     'admission_source_id', 'payer_code', 'medical_specialty', 'max_glu_serum', 'A1Cresult', 'metformin',
                     'repaglinide', 'nateglinide', 'chlorpropamide', 'glimepiride', 'acetohexamide', 'glipizide', 'glyburide',
                     'tolbutamide', 'pioglitazone', 'rosiglitazone', 'acarbose', 'miglitol', 'troglitazone', 'tolazamide',
                     'insulin', 'glyburide-metformin', 'glipizide-metformin', 'glimepiride-pioglitazone',
                     'metformin-rosiglitazone', 'metformin-pioglitazone', 'change', 'diabetesMed', 'diag_1_missing',
                     'diag_2_missing', 'diag_3_missing', 'race_missing', 'weight_missing', 'payer_code_missing',
                     'medical_specialty_missing']
    va = VectorAssembler(inputCols=(num_var_names + [c + "__encoded__" for c in cat_var_names]), outputCol='features')
    df_preds = va.transform(df_preds).select('features')
    
    df_preds = model.transform(df_preds)
    df_preds_pandas = df_preds.toPandas()
    df_preds_pandas = pd.concat([df[['patient_nbr', 'discharge_date']],
                                 df_preds_pandas['probability'].map(lambda x: x[1])], axis=1)
    
    # Save the predictions
    blob_service.put_block_blob_from_text(blob_name='-'.join(str(day_to_predict).split('/')) + '.csv',
                                          container_name=preds_container,
                                          text=df_preds_pandas.to_csv(index=False))
    return
Esempio n. 15
0
def connect(config=False):
  import lib.misc as misc 
  from azure.storage.blob import BlobService
  global blob_service, container
  # Connect to the cloud service. 
  if not config: config = misc.config['_private']

  container = 'streams'

  if not 'azure' in config:
    return None, None

  if not blob_service:
    blob_service = BlobService(config['azure']['storage_account_name'], config['azure']['primary_access_key'])
    blob_service.create_container(container, x_ms_blob_public_access='container')

  return blob_service, container
    def __get_available_storage_account_and_container(self, hackathon_id):
        """
        Get available storage account and container

        :param hackathon_id: the id of hackathon
        :type hackathon_id: integer

        :return: if there is available storage account and container, then return (True, storage
                 account name, container name). Otherwise, return (False, None, None)
        :rtype: 3-element tuple: (bool, str|unicode, str|unicode)
        """
        container_name = self.util.safe_get_config('dockerhostserver.azure.container', 'dockerhostprivatecontainer')
        sms = self.__get_sms_object(hackathon_id)
        if sms is None:
            self.log.error('Something wrong with Azure account of Hackathon:%d' % hackathon_id)
            return False, None, None
        storage_accounts = sms.list_storage_accounts()
        # check storage account one by one, return True once find a qualified one
        for storage in storage_accounts.storage_services:
            try:
                storage_response = sms.get_storage_account_keys(storage.service_name)
            except Exception as e:
                self.log.error('Encounter an error when checking storage_account:%s ' % storage.service_name)
                self.log.error(e)
                continue
            blob_service = BlobService(account_name=storage.service_name,
                                       account_key=storage_response.storage_service_keys.primary,
                                       host_base=self.util.safe_get_config('dockerhostserver.storage.host_base',
                                                                           '.blob.core.chinacloudapi.cn'))
            try:
                blob_service.get_container_metadata(container_name)
                return True, storage.service_name, container_name
            except Exception as e:
                if e.message != AzureApiExceptionMessage.CONTAINER_NOT_FOUND:
                    self.log.error('Encounter an error when checking container:%s ' % container_name)
                    self.log.error(e)
                    continue
            try:
                blob_service.create_container(container_name=container_name, x_ms_blob_public_access='container')
                return True, storage.service_name, container_name
            except Exception as e:
                self.log.error('Encounter an error when creating container:%s ' % container_name)
                self.log.error(e)
        return False, None, None
Esempio n. 17
0
class BlobSaver(Saver):
    def __init__(self, account, key, container, prefix):
        self.block_blob_service = BlobService(account_name=account,
                                              account_key=key)
        self.container = container
        self.prefix = prefix
        self.block_blob_service.create_container(self.container)

    def send_data(self, name, data):
        counter = BLOB_RETRIES
        while counter:
            try:
                self.block_blob_service.put_block_blob_from_bytes(
                    self.container, os.path.join(self.prefix, name), data)
            except AzureException as azure_exc:
                counter -= 1
            else:
                return
        raise RuntimeError("Couldn't send to blob." % (azure_exc.args[0]))
Esempio n. 18
0
class AzureJobStore(AbstractJobStore):
    """
    A job store that uses Azure's blob store for file storage and Table Service to store job info
    with strong consistency.
    """

    # Dots in container names should be avoided because container names are used in HTTPS bucket
    # URLs where the may interfere with the certificate common name. We use a double underscore
    # as a separator instead.
    #
    containerNameRe = re.compile(r'^[a-z0-9](-?[a-z0-9]+)+[a-z0-9]$')

    # See https://msdn.microsoft.com/en-us/library/azure/dd135715.aspx
    #
    minContainerNameLen = 3
    maxContainerNameLen = 63
    maxNameLen = 10
    nameSeparator = 'xx'  # Table names must be alphanumeric
    # Length of a jobID - used to test if a stats file has been read already or not
    jobIDLength = len(str(uuid.uuid4()))

    def __init__(self, locator, jobChunkSize=maxAzureTablePropertySize):
        super(AzureJobStore, self).__init__()
        accountName, namePrefix = locator.split(':', 1)
        if '--' in namePrefix:
            raise ValueError("Invalid name prefix '%s'. Name prefixes may not contain %s."
                             % (namePrefix, self.nameSeparator))
        if not self.containerNameRe.match(namePrefix):
            raise ValueError("Invalid name prefix '%s'. Name prefixes must contain only digits, "
                             "hyphens or lower-case letters and must not start or end in a "
                             "hyphen." % namePrefix)
        # Reserve 13 for separator and suffix
        if len(namePrefix) > self.maxContainerNameLen - self.maxNameLen - len(self.nameSeparator):
            raise ValueError(("Invalid name prefix '%s'. Name prefixes may not be longer than 50 "
                              "characters." % namePrefix))
        if '--' in namePrefix:
            raise ValueError("Invalid name prefix '%s'. Name prefixes may not contain "
                             "%s." % (namePrefix, self.nameSeparator))
        self.locator = locator
        self.jobChunkSize = jobChunkSize
        self.accountKey = _fetchAzureAccountKey(accountName)
        self.accountName = accountName
        # Table names have strict requirements in Azure
        self.namePrefix = self._sanitizeTableName(namePrefix)
        # These are the main API entry points.
        self.tableService = TableService(account_key=self.accountKey, account_name=accountName)
        self.blobService = BlobService(account_key=self.accountKey, account_name=accountName)
        # Serialized jobs table
        self.jobItems = None
        # Job<->file mapping table
        self.jobFileIDs = None
        # Container for all shared and unshared files
        self.files = None
        # Stats and logging strings
        self.statsFiles = None
        # File IDs that contain stats and logging strings
        self.statsFileIDs = None

    @property
    def keyPath(self):
        return self.config.cseKey

    def initialize(self, config):
        if self._jobStoreExists():
            raise JobStoreExistsException(self.locator)
        logger.debug("Creating job store at '%s'" % self.locator)
        self._bind(create=True)
        super(AzureJobStore, self).initialize(config)

    def resume(self):
        if not self._jobStoreExists():
            raise NoSuchJobStoreException(self.locator)
        logger.debug("Using existing job store at '%s'" % self.locator)
        self._bind(create=False)
        super(AzureJobStore, self).resume()

    def destroy(self):
        self._bind()
        for name in 'jobItems', 'jobFileIDs', 'files', 'statsFiles', 'statsFileIDs':
            resource = getattr(self, name)
            if resource is not None:
                if isinstance(resource, AzureTable):
                    resource.delete_table()
                elif isinstance(resource, AzureBlobContainer):
                    resource.delete_container()
                else:
                    assert False
                setattr(self, name, None)

    def _jobStoreExists(self):
        """
        Checks if job store exists by querying the existence of the statsFileIDs table. Note that
        this is the last component that is deleted in :meth:`.destroy`.
        """
        for attempt in retry_azure():
            with attempt:
                try:
                    table = self.tableService.query_tables(table_name=self._qualify('statsFileIDs'))
                except AzureMissingResourceHttpError as e:
                    if e.status_code == 404:
                        return False
                    else:
                        raise
                else:
                    return table is not None

    def _bind(self, create=False):
        table = self._bindTable
        container = self._bindContainer
        for name, binder in (('jobItems', table),
                             ('jobFileIDs', table),
                             ('files', container),
                             ('statsFiles', container),
                             ('statsFileIDs', table)):
            if getattr(self, name) is None:
                setattr(self, name, binder(self._qualify(name), create=create))

    def _qualify(self, name):
        return self.namePrefix + self.nameSeparator + name.lower()

    def jobs(self):

        # How many jobs have we done?
        total_processed = 0

        for jobEntity in self.jobItems.query_entities_auto():
            # Process the items in the page
            yield AzureJob.fromEntity(jobEntity)
            total_processed += 1

            if total_processed % 1000 == 0:
                # Produce some feedback for the user, because this can take
                # a long time on, for example, Azure
                logger.debug("Processed %d total jobs" % total_processed)

        logger.debug("Processed %d total jobs" % total_processed)

    def create(self, jobNode):
        jobStoreID = self._newJobID()
        job = AzureJob.fromJobNode(jobNode, jobStoreID, self._defaultTryCount())
        entity = job.toItem(chunkSize=self.jobChunkSize)
        entity['RowKey'] = EntityProperty('Edm.String', jobStoreID)
        self.jobItems.insert_entity(entity=entity)
        return job

    def exists(self, jobStoreID):
        if self.jobItems.get_entity(row_key=bytes(jobStoreID)) is None:
            return False
        return True

    def load(self, jobStoreID):
        jobEntity = self.jobItems.get_entity(row_key=bytes(jobStoreID))
        if jobEntity is None:
            raise NoSuchJobException(jobStoreID)
        return AzureJob.fromEntity(jobEntity)

    def update(self, job):
        self.jobItems.update_entity(row_key=bytes(job.jobStoreID),
                                    entity=job.toItem(chunkSize=self.jobChunkSize))

    def delete(self, jobStoreID):
        try:
            self.jobItems.delete_entity(row_key=bytes(jobStoreID))
        except AzureMissingResourceHttpError:
            # Job deletion is idempotent, and this job has been deleted already
            return
        filterString = "PartitionKey eq '%s'" % jobStoreID
        for fileEntity in self.jobFileIDs.query_entities(filter=filterString):
            jobStoreFileID = fileEntity.RowKey
            self.deleteFile(jobStoreFileID)

    def getEnv(self):
        return dict(AZURE_ACCOUNT_KEY=self.accountKey)

    class BlobInfo(namedtuple('BlobInfo', ('account', 'container', 'name'))):
        @property
        @memoize
        def service(self):
            return BlobService(account_name=self.account,
                               account_key=_fetchAzureAccountKey(self.account))

    @classmethod
    def getSize(cls, url):
        blob = cls._parseWasbUrl(url)
        blobProps = blob.service.get_blob_properties(blob.container, blob.name)
        return int(blobProps['content-length'])

    @classmethod
    def _readFromUrl(cls, url, writable):
        blob = cls._parseWasbUrl(url)
        for attempt in retry_azure():
            with attempt:
                blob.service.get_blob_to_file(container_name=blob.container,
                                              blob_name=blob.name,
                                              stream=writable)

    @classmethod
    def _writeToUrl(cls, readable, url):
        blob = cls._parseWasbUrl(url)
        blob.service.put_block_blob_from_file(container_name=blob.container,
                                              blob_name=blob.name,
                                              stream=readable)

    @classmethod
    def _parseWasbUrl(cls, url):
        """
        :param urlparse.ParseResult url: x
        :rtype: AzureJobStore.BlobInfo
        """
        assert url.scheme in ('wasb', 'wasbs')
        try:
            container, account = url.netloc.split('@')
        except ValueError:
            raise InvalidImportExportUrlException(url)
        suffix = '.blob.core.windows.net'
        if account.endswith(suffix):
            account = account[:-len(suffix)]
        else:
            raise InvalidImportExportUrlException(url)
        assert url.path[0] == '/'
        return cls.BlobInfo(account=account, container=container, name=url.path[1:])

    @classmethod
    def _supportsUrl(cls, url, export=False):
        return url.scheme.lower() in ('wasb', 'wasbs')

    def writeFile(self, localFilePath, jobStoreID=None):
        jobStoreFileID = self._newFileID()
        self.updateFile(jobStoreFileID, localFilePath)
        self._associateFileWithJob(jobStoreFileID, jobStoreID)
        return jobStoreFileID

    def updateFile(self, jobStoreFileID, localFilePath):
        with open(localFilePath) as read_fd:
            with self._uploadStream(jobStoreFileID, self.files) as write_fd:
                while True:
                    buf = read_fd.read(self._maxAzureBlockBytes)
                    write_fd.write(buf)
                    if len(buf) == 0:
                        break

    def readFile(self, jobStoreFileID, localFilePath):
        try:
            with self._downloadStream(jobStoreFileID, self.files) as read_fd:
                with open(localFilePath, 'w') as write_fd:
                    while True:
                        buf = read_fd.read(self._maxAzureBlockBytes)
                        write_fd.write(buf)
                        if not buf:
                            break
        except AzureMissingResourceHttpError:
            raise NoSuchFileException(jobStoreFileID)

    def deleteFile(self, jobStoreFileID):
        try:
            self.files.delete_blob(blob_name=bytes(jobStoreFileID))
            self._dissociateFileFromJob(jobStoreFileID)
        except AzureMissingResourceHttpError:
            pass

    def fileExists(self, jobStoreFileID):
        # As Azure doesn't have a blob_exists method (at least in the
        # python API) we just try to download the metadata, and hope
        # the metadata is small so the call will be fast.
        try:
            self.files.get_blob_metadata(blob_name=bytes(jobStoreFileID))
            return True
        except AzureMissingResourceHttpError:
            return False

    @contextmanager
    def writeFileStream(self, jobStoreID=None):
        # TODO: this (and all stream methods) should probably use the
        # Append Blob type, but that is not currently supported by the
        # Azure Python API.
        jobStoreFileID = self._newFileID()
        with self._uploadStream(jobStoreFileID, self.files) as fd:
            yield fd, jobStoreFileID
        self._associateFileWithJob(jobStoreFileID, jobStoreID)

    @contextmanager
    def updateFileStream(self, jobStoreFileID):
        with self._uploadStream(jobStoreFileID, self.files, checkForModification=True) as fd:
            yield fd

    def getEmptyFileStoreID(self, jobStoreID=None):
        jobStoreFileID = self._newFileID()
        with self._uploadStream(jobStoreFileID, self.files) as _:
            pass
        self._associateFileWithJob(jobStoreFileID, jobStoreID)
        return jobStoreFileID

    @contextmanager
    def readFileStream(self, jobStoreFileID):
        if not self.fileExists(jobStoreFileID):
            raise NoSuchFileException(jobStoreFileID)
        with self._downloadStream(jobStoreFileID, self.files) as fd:
            yield fd

    @contextmanager
    def writeSharedFileStream(self, sharedFileName, isProtected=None):
        assert self._validateSharedFileName(sharedFileName)
        sharedFileID = self._newFileID(sharedFileName)
        with self._uploadStream(sharedFileID, self.files, encrypted=isProtected) as fd:
            yield fd

    @contextmanager
    def readSharedFileStream(self, sharedFileName):
        assert self._validateSharedFileName(sharedFileName)
        sharedFileID = self._newFileID(sharedFileName)
        if not self.fileExists(sharedFileID):
            raise NoSuchFileException(sharedFileID)
        with self._downloadStream(sharedFileID, self.files) as fd:
            yield fd

    def writeStatsAndLogging(self, statsAndLoggingString):
        # TODO: would be a great use case for the append blobs, once implemented in the Azure SDK
        jobStoreFileID = self._newFileID()
        encrypted = self.keyPath is not None
        if encrypted:
            statsAndLoggingString = encryption.encrypt(statsAndLoggingString, self.keyPath)
        self.statsFiles.put_block_blob_from_text(blob_name=bytes(jobStoreFileID),
                                                 text=statsAndLoggingString,
                                                 x_ms_meta_name_values=dict(
                                                     encrypted=str(encrypted)))
        self.statsFileIDs.insert_entity(entity={'RowKey': jobStoreFileID})

    def readStatsAndLogging(self, callback, readAll=False):
        suffix = '_old'
        numStatsFiles = 0
        for attempt in retry_azure():
            with attempt:
                for entity in self.statsFileIDs.query_entities():
                    jobStoreFileID = entity.RowKey
                    hasBeenRead = len(jobStoreFileID) > self.jobIDLength
                    if not hasBeenRead:
                        with self._downloadStream(jobStoreFileID, self.statsFiles) as fd:
                            callback(fd)
                        # Mark this entity as read by appending the suffix
                        self.statsFileIDs.insert_entity(entity={'RowKey': jobStoreFileID + suffix})
                        self.statsFileIDs.delete_entity(row_key=bytes(jobStoreFileID))
                        numStatsFiles += 1
                    elif readAll:
                        # Strip the suffix to get the original ID
                        jobStoreFileID = jobStoreFileID[:-len(suffix)]
                        with self._downloadStream(jobStoreFileID, self.statsFiles) as fd:
                            callback(fd)
                        numStatsFiles += 1
        return numStatsFiles

    _azureTimeFormat = "%Y-%m-%dT%H:%M:%SZ"

    def getPublicUrl(self, jobStoreFileID):
        try:
            self.files.get_blob_properties(blob_name=bytes(jobStoreFileID))
        except AzureMissingResourceHttpError:
            raise NoSuchFileException(jobStoreFileID)
        # Compensate of a little bit of clock skew
        startTimeStr = (datetime.utcnow() - timedelta(minutes=5)).strftime(self._azureTimeFormat)
        endTime = datetime.utcnow() + self.publicUrlExpiration
        endTimeStr = endTime.strftime(self._azureTimeFormat)
        sap = SharedAccessPolicy(AccessPolicy(startTimeStr, endTimeStr,
                                              BlobSharedAccessPermissions.READ))
        sas_token = self.files.generate_shared_access_signature(blob_name=bytes(jobStoreFileID),
                                                                shared_access_policy=sap)
        return self.files.make_blob_url(blob_name=bytes(jobStoreFileID)) + '?' + sas_token

    def getSharedPublicUrl(self, sharedFileName):
        jobStoreFileID = self._newFileID(sharedFileName)
        return self.getPublicUrl(jobStoreFileID)

    def _newJobID(self):
        # raw UUIDs don't work for Azure property names because the '-' character is disallowed.
        return str(uuid.uuid4()).replace('-', '_')

    # A dummy job ID under which all shared files are stored.
    sharedFileJobID = uuid.UUID('891f7db6-e4d9-4221-a58e-ab6cc4395f94')

    def _newFileID(self, sharedFileName=None):
        if sharedFileName is None:
            ret = bytes(uuid.uuid4())
        else:
            ret = bytes(uuid.uuid5(self.sharedFileJobID, bytes(sharedFileName)))
        return ret.replace('-', '_')

    def _associateFileWithJob(self, jobStoreFileID, jobStoreID=None):
        if jobStoreID is not None:
            self.jobFileIDs.insert_entity(entity={'PartitionKey': EntityProperty('Edm.String', jobStoreID),
                                                  'RowKey': EntityProperty('Edm.String', jobStoreFileID)})

    def _dissociateFileFromJob(self, jobStoreFileID):
        entities = self.jobFileIDs.query_entities(filter="RowKey eq '%s'" % jobStoreFileID)
        if entities:
            assert len(entities) == 1
            jobStoreID = entities[0].PartitionKey
            self.jobFileIDs.delete_entity(partition_key=bytes(jobStoreID), row_key=bytes(jobStoreFileID))

    def _bindTable(self, tableName, create=False):
        for attempt in retry_azure():
            with attempt:
                try:
                    tables = self.tableService.query_tables(table_name=tableName)
                except AzureMissingResourceHttpError as e:
                    if e.status_code != 404:
                        raise
                else:
                    if tables:
                        assert tables[0].name == tableName
                        return AzureTable(self.tableService, tableName)
                if create:
                    self.tableService.create_table(tableName)
                    return AzureTable(self.tableService, tableName)
                else:
                    return None

    def _bindContainer(self, containerName, create=False):
        for attempt in retry_azure():
            with attempt:
                try:
                    self.blobService.get_container_properties(containerName)
                except AzureMissingResourceHttpError as e:
                    if e.status_code == 404:
                        if create:
                            self.blobService.create_container(containerName)
                        else:
                            return None
                    else:
                        raise
        return AzureBlobContainer(self.blobService, containerName)

    def _sanitizeTableName(self, tableName):
        """
        Azure table names must start with a letter and be alphanumeric.

        This will never cause a collision if uuids are used, but
        otherwise may not be safe.
        """
        return 'a' + ''.join([x for x in tableName if x.isalnum()])

    # Maximum bytes that can be in any block of an Azure block blob
    # https://github.com/Azure/azure-storage-python/blob/4c7666e05a9556c10154508335738ee44d7cb104/azure/storage/blob/blobservice.py#L106
    _maxAzureBlockBytes = 4 * 1024 * 1024

    @contextmanager
    def _uploadStream(self, jobStoreFileID, container, checkForModification=False, encrypted=None):
        """
        :param encrypted: True to enforce encryption (will raise exception unless key is set),
        False to prevent encryption or None to encrypt if key is set.
        """
        if checkForModification:
            try:
                expectedVersion = container.get_blob_properties(blob_name=bytes(jobStoreFileID))['etag']
            except AzureMissingResourceHttpError:
                expectedVersion = None

        if encrypted is None:
            encrypted = self.keyPath is not None
        elif encrypted:
            if self.keyPath is None:
                raise RuntimeError('Encryption requested but no key was provided')

        maxBlockSize = self._maxAzureBlockBytes
        if encrypted:
            # There is a small overhead for encrypted data.
            maxBlockSize -= encryption.overhead

        store = self

        class UploadPipe(WritablePipe):

            def readFrom(self, readable):
                blockIDs = []
                try:
                    while True:
                        buf = readable.read(maxBlockSize)
                        if len(buf) == 0:
                            # We're safe to break here even if we never read anything, since
                            # putting an empty block list creates an empty blob.
                            break
                        if encrypted:
                            buf = encryption.encrypt(buf, store.keyPath)
                        blockID = store._newFileID()
                        container.put_block(blob_name=bytes(jobStoreFileID),
                                            block=buf,
                                            blockid=blockID)
                        blockIDs.append(blockID)
                except:
                    with panic(log=logger):
                        # This is guaranteed to delete any uncommitted blocks.
                        container.delete_blob(blob_name=bytes(jobStoreFileID))

                if checkForModification and expectedVersion is not None:
                    # Acquire a (60-second) write lock,
                    leaseID = container.lease_blob(blob_name=bytes(jobStoreFileID),
                                                   x_ms_lease_action='acquire')['x-ms-lease-id']
                    # check for modification,
                    blobProperties = container.get_blob_properties(blob_name=bytes(jobStoreFileID))
                    if blobProperties['etag'] != expectedVersion:
                        container.lease_blob(blob_name=bytes(jobStoreFileID),
                                             x_ms_lease_action='release',
                                             x_ms_lease_id=leaseID)
                        raise ConcurrentFileModificationException(jobStoreFileID)
                    # commit the file,
                    container.put_block_list(blob_name=bytes(jobStoreFileID),
                                             block_list=blockIDs,
                                             x_ms_lease_id=leaseID,
                                             x_ms_meta_name_values=dict(
                                                 encrypted=str(encrypted)))
                    # then release the lock.
                    container.lease_blob(blob_name=bytes(jobStoreFileID),
                                         x_ms_lease_action='release',
                                         x_ms_lease_id=leaseID)
                else:
                    # No need to check for modification, just blindly write over whatever
                    # was there.
                    container.put_block_list(blob_name=bytes(jobStoreFileID),
                                             block_list=blockIDs,
                                             x_ms_meta_name_values=dict(encrypted=str(encrypted)))

        with UploadPipe() as writable:
            yield writable

    @contextmanager
    def _downloadStream(self, jobStoreFileID, container):
        # The reason this is not in the writer is so we catch non-existant blobs early

        blobProps = container.get_blob_properties(blob_name=bytes(jobStoreFileID))

        encrypted = strict_bool(blobProps['x-ms-meta-encrypted'])
        if encrypted and self.keyPath is None:
            raise AssertionError('Content is encrypted but no key was provided.')

        outer_self = self

        class DownloadPipe(ReadablePipe):
            def writeTo(self, writable):
                chunkStart = 0
                fileSize = int(blobProps['Content-Length'])
                while chunkStart < fileSize:
                    chunkEnd = chunkStart + outer_self._maxAzureBlockBytes - 1
                    buf = container.get_blob(blob_name=bytes(jobStoreFileID),
                                             x_ms_range="bytes=%d-%d" % (chunkStart, chunkEnd))
                    if encrypted:
                        buf = encryption.decrypt(buf, outer_self.keyPath)
                    writable.write(buf)
                    chunkStart = chunkEnd + 1

        with DownloadPipe() as readable:
            yield readable
Esempio n. 19
0
    filename="/tmp/telegram.log",
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
    level=logging.INFO)

logger = logging.getLogger(__name__)

ACCOUNT = 'msgtest'
CONTAINER = 'telegram'

blob_service = BlobService(
    account_name='msgtest',
    account_key=
    'sJQjZXgR/IUH4o4/CmbXue3DGxRgwkzy0SILxJMSgmd26lFCXUdqrtwwjmEPU9CrcIvoJG3yv6L0R55o9BqnXw=='
)

blob_service.create_container(CONTAINER, x_ms_blob_public_access='container')


def uploadblob(fileidshort, mediaurl):
    global ACCOUNT
    fileid = fileidshort + basename(mediaurl)
    tmppath = '/tmp/' + fileid
    urlretrieve(mediaurl, tmppath)
    blob_service.put_block_blob_from_path(
        CONTAINER, fileid, tmppath, x_ms_blob_content_type=guess_type(tmppath))
    return 'https://%s.blob.core.windows.net/%s/%s' % (ACCOUNT, CONTAINER,
                                                       fileid)


def downloadblob(fileid, filename):
    blob_service.get_blob_to_path(CONTAINER, fileid, filename)
Esempio n. 20
0
class AzureIOStore(IOStore):
    """
    A class that lets you get input from and send output to Azure Storage.
    
    """
    def __init__(self, account_name, container_name, name_prefix=""):
        """
        Make a new AzureIOStore that reads from and writes to the given
        container in the given account, adding the given prefix to keys. All
        paths will be interpreted as keys or key prefixes.
        
        If the name prefix does not end with a trailing slash, and is not empty,
        one will be added automatically.
        
        Account keys are retrieved from the AZURE_ACCOUNT_KEY environment
        variable or from the ~/.toilAzureCredentials file, as in Toil itself.
        
        """

        # Make sure azure libraries actually loaded
        assert (have_azure)

        self.account_name = account_name
        self.container_name = container_name
        self.name_prefix = name_prefix

        if self.name_prefix != "" and not self.name_prefix.endswith("/"):
            # Make sure it has the trailing slash required.
            self.name_prefix += "/"

        # Sneak into Toil and use the same keys it uses
        self.account_key = toil.jobStores.azureJobStore._fetchAzureAccountKey(
            self.account_name)

        # This will hold out Azure blob store connection
        self.connection = None

    def __getstate__(self):
        """
        Return the state to use for pickling. We don't want to try and pickle
        an open Azure connection.
        """

        return (self.account_name, self.account_key, self.container_name,
                self.name_prefix)

    def __setstate__(self, state):
        """
        Set up after unpickling.
        """

        self.account_name = state[0]
        self.account_key = state[1]
        self.container_name = state[2]
        self.name_prefix = state[3]

        self.connection = None

    def __connect(self):
        """
        Make sure we have an Azure connection, and set one up if we don't.
        """

        if self.connection is None:
            RealtimeLogger.debug("Connecting to account {}, using "
                                 "container {} and prefix {}".format(
                                     self.account_name, self.container_name,
                                     self.name_prefix))

            # Connect to the blob service where we keep everything
            self.connection = BlobService(account_name=self.account_name,
                                          account_key=self.account_key)

    @backoff
    def read_input_file(self, input_path, local_path):
        """
        Get input from Azure.
        """

        self.__connect()

        RealtimeLogger.debug("Loading {} from AzureIOStore".format(input_path))

        # Download the blob. This is known to be synchronous, although it can
        # call a callback during the process.
        self.connection.get_blob_to_path(self.container_name,
                                         self.name_prefix + input_path,
                                         local_path)

    def list_input_directory(self,
                             input_path,
                             recursive=False,
                             with_times=False):
        """
        Loop over fake /-delimited directories on Azure. The prefix may or may
        not not have a trailing slash; if not, one will be added automatically.
        
        Returns the names of files and fake directories in the given input fake
        directory, non-recursively.
        
        If with_times is specified, will yield (name, time) pairs including
        modification times as datetime objects. Times on directories are None.
        
        """

        self.__connect()

        RealtimeLogger.info(
            "Enumerating {} from AzureIOStore".format(input_path))

        # Work out what the directory name to list is
        fake_directory = self.name_prefix + input_path

        if fake_directory != "" and not fake_directory.endswith("/"):
            # We have a nonempty prefix, and we need to end it with a slash
            fake_directory += "/"

        # This will hold the marker that we need to send back to get the next
        # page, if there is one. See <http://stackoverflow.com/a/24303682>
        marker = None

        # This holds the subdirectories we found; we yield each exactly once if
        # we aren't recursing.
        subdirectories = set()

        while True:

            # Get the results from Azure. We don't use delimiter since Azure
            # doesn't seem to provide the placeholder entries it's supposed to.
            result = self.connection.list_blobs(self.container_name,
                                                prefix=fake_directory,
                                                marker=marker)

            RealtimeLogger.info("Found {} files".format(len(result)))

            for blob in result:
                # Yield each result's blob name, but directory names only once

                # Drop the common prefix
                relative_path = blob.name[len(fake_directory):]

                if (not recursive) and "/" in relative_path:
                    # We found a file in a subdirectory, and we aren't supposed
                    # to be recursing.
                    subdirectory, _ = relative_path.split("/", 1)

                    if subdirectory not in subdirectories:
                        # It's a new subdirectory. Yield and remember it
                        subdirectories.add(subdirectory)

                        if with_times:
                            yield subdirectory, None
                        else:
                            yield subdirectory
                else:
                    # We found an actual file
                    if with_times:
                        mtime = blob.properties.last_modified

                        if isinstance(mtime, datetime.datetime):
                            # Make sure we're getting proper localized datetimes
                            # from the new Azure Storage API.
                            assert (mtime.tzinfo is not None and
                                    mtime.tzinfo.utcoffset(mtime) is not None)
                        else:
                            # Convert mtime from a string as in the old API.
                            mtime = dateutil.parser.parse(mtime).replace(
                                tzinfo=dateutil.tz.tzutc())

                        yield relative_path, mtime

                    else:
                        yield relative_path

            # Save the marker
            marker = result.next_marker

            if not marker:
                break

    @backoff
    def write_output_file(self, local_path, output_path):
        """
        Write output to Azure. Will create the container if necessary.
        """

        self.__connect()

        RealtimeLogger.debug("Saving {} to AzureIOStore".format(output_path))

        try:
            # Make the container
            self.connection.create_container(self.container_name)
        except azure.WindowsAzureConflictError:
            # The container probably already exists
            pass

        # Upload the blob (synchronously)
        # TODO: catch no container error here, make the container, and retry
        self.connection.put_block_blob_from_path(
            self.container_name, self.name_prefix + output_path, local_path)

    @backoff
    def exists(self, path):
        """
        Returns true if the given input or output file exists in Azure already.
        
        """

        self.__connect()

        marker = None

        while True:

            try:
                # Make the container
                self.connection.create_container(self.container_name)
            except azure.WindowsAzureConflictError:
                # The container probably already exists
                pass

            # Get the results from Azure.
            result = self.connection.list_blobs(self.container_name,
                                                prefix=self.name_prefix + path,
                                                marker=marker)

            for blob in result:
                # Look at each blob

                if blob.name == self.name_prefix + path:
                    # Found it
                    return True

            # Save the marker
            marker = result.next_marker

            if not marker:
                break

        return False

    @backoff
    def get_mtime(self, path):
        """
        Returns the modification time of the given blob if it exists, or None
        otherwise.
        
        """

        self.__connect()

        marker = None

        while True:

            # Get the results from Azure.
            result = self.connection.list_blobs(self.container_name,
                                                prefix=self.name_prefix + path,
                                                marker=marker)

            for blob in result:
                # Look at each blob

                if blob.name == self.name_prefix + path:
                    # Found it
                    mtime = blob.properties.last_modified

                    if isinstance(mtime, datetime.datetime):
                        # Make sure we're getting proper localized datetimes
                        # from the new Azure Storage API.
                        assert (mtime.tzinfo is not None
                                and mtime.tzinfo.utcoffset(mtime) is not None)
                    else:
                        # Convert mtime from a string as in the old API.
                        mtime = dateutil.parser.parse(mtime).replace(
                            tzinfo=dateutil.tz.tzutc())

                    return mtime

            # Save the marker
            marker = result.next_marker

            if not marker:
                break

        return None

    @backoff
    def get_size(self, path):
        """
        Returns the size in bytes of the given blob if it exists, or None
        otherwise.
        
        """

        self.__connect()

        marker = None

        while True:

            # Get the results from Azure.
            result = self.connection.list_blobs(self.container_name,
                                                prefix=self.name_prefix + path,
                                                marker=marker)

            for blob in result:
                # Look at each blob

                if blob.name == self.name_prefix + path:
                    # Found it
                    size = blob.properties.content_length

                    return size

            # Save the marker
            marker = result.next_marker

            if not marker:
                break

        return None
Esempio n. 21
0
class AzureStorage(Storage):
    account_name = settings.AZURE_ACCOUNT_NAME
    account_key = settings.AZURE_ACCOUNT_KEY
    azure_container = settings.AZURE_CONTAINER

    def __init__(self, *args, **kwargs):
        super(AzureStorage, self).__init__(*args, **kwargs)
        self._connection = None

    @property
    def connection(self):
        if self._connection is None:
            # Create connection
            self._connection = BlobService(self.account_name, self.account_key)

            # Create container if needed
            containers = [
                c for c in self._connection.list_containers(
                    prefix=self.azure_container)
                if c.name == self.azure_container
            ]
            if len(containers) == 0:
                self._connection.create_container(
                    self.azure_container,
                    {'origin': 'created by Django web app'},
                    fail_on_exist=True)

        return self._connection

    def _open(self, name, mode="rb"):
        stream = SimpleUploadedFile(name, None)
        self.connection.get_blob_to_file(self.azure_container, name, stream)
        stream.seek(0)
        return stream

    def exists(self, name):
        try:
            self.connection.get_blob_properties(self.azure_container, name)
        except AzureMissingResourceHttpError:
            return False
        else:
            return True

    def delete(self, name):
        self.connection.delete_blob(self.azure_container, name)

    def size(self, name):
        properties = self.connection.get_blob_properties(
            self.azure_container, name)
        return properties["content-length"]

    def _save(self, name, content):
        self.connection.put_block_blob_from_file(self.azure_container, name,
                                                 content)
        return name

    def url(self, name):
        ap = AccessPolicy(expiry=(timezone.datetime.utcnow() + timezone.timedelta(seconds=600)).strftime('%Y-%m-%dT%H:%M:%SZ'), \
                          start=(timezone.datetime.utcnow() + timezone.timedelta(seconds=-600)).strftime('%Y-%m-%dT%H:%M:%SZ'), \
                          permission='r')
        sap = SharedAccessPolicy(ap)
        sas = SharedAccessSignature(self.account_name, self.account_key)
        url = sas.generate_signed_query_string(path=self.azure_container +
                                               '/' + name,
                                               resource_type='b',
                                               shared_access_policy=sap)

        return self.connection.make_blob_url(self.azure_container,
                                             name) + "?" + url
from azure.storage import AccessPolicy, SharedAccessPolicy, SignedIdentifier, SignedIdentifiers
from azure.storage.blob import BlobService, ContainerSharedAccessPermissions

# The name of the new Shared Access policy
policy_name = 'readandlistonly'
# The Storage Account Name
storage_account_name = 'larryfrstore'
storage_account_key = 'Vm7YUAvuKQFjNSv2xY3ckgxwLUMkECUHNFF09lipZY2QxNgTFDHbA7o4U6joHXg+/Wd23sHkukjZUp41siTtwQ=='
storage_container_name = 'mycontainer'
example_file_path = '..\\sampledata\\sample.log'

# Create the blob service, using the name and key for your Azure Storage account
blob_service = BlobService(storage_account_name, storage_account_key)

# Create the container, if it does not already exist
blob_service.create_container(storage_container_name)

# Upload an example file to the container
blob_service.put_block_blob_from_path(
    storage_container_name,
    'sample.log',
    example_file_path,
)

# Create a new signed identifier (policy)
si = SignedIdentifier()
# Set the name
si.id = policy_name
# Set the expiration date
si.access_policy.expiry = '2016-01-01'
# Set the permissions. Read and List in this example
Esempio n. 23
0
from azure.storage.blob import BlobService
from mimetypes import guess_type

ACCOUNT = 'msgtest'
CONTAINER = 'telegram'

blob_service = BlobService(account_name=ACCOUNT, account_key='sJQjZXgR/IUH4o4/CmbXue3DGxRgwkzy0SILxJMSgmd26lFCXUdqrtwwjmEPU9CrcIvoJG3yv6L0R55o9BqnXw==')

blob_service.create_container(CONTAINER, x_ms_blob_public_access='container')


def putblob(fileid, filename):
    global ACCOUNT
    blob_service.put_block_blob_from_path(
        CONTAINER,
        fileid,
        filename,
        x_ms_blob_content_type=guess_type(filename)
    )
    return 'https://%s.blob.core.windows.net/%s/%s' %(ACCOUNT, CONTAINER, fileid)

putblob('quotes.pkl', 'quotes.pkl')


blobs = []
marker = None
while True:
    batch = blob_service.list_blobs(CONTAINER, marker=marker)
    blobs.extend(batch)
    if not batch.next_marker:
Esempio n. 24
0
class Azure(object):
    '''
    A class used to connect to the Azure storage and
    upload/download files using blob storage
    '''
    def __init__(self, params={}):
        '''
        Constructor for the Azure object

        '''
        if "user" in params:
            self.user = params["user"]
        else:
            self.user = None
        if "key" in params:
            self.key = params["key"]
        else:
            self.key = None

    def connect(self, host, port, user, password, secure):
        '''
        Connect to the Azure service with given user and key
        @param user - username to use to connect to
        @param key - key to use to connect
        '''
        kwargs = {}
        err = None
        if not host is None:
            kwargs["host_base"] = "." + host
        if not user is None:
            kwargs["account_name"] = user
        elif not self.user is None:
            kwargs["account_name"] = self.user
        if not password is None:
            kwargs["account_key"] = password
        elif not self.key is None:
            kwargs["account_key"] = self.key
        kwargs["protocol"] = "https" if secure else "http"
        try:
            self.service = BlobService(**kwargs)
        except Exception as e:
            err = e.message
            self.service = None
        if self.service is None:
            raise OsakaException("Failed to connect to Azure:" +
                                 ("" if err is None else err))

    @classmethod
    def getSchemes(clazz):
        '''
        Returns a list of schemes this handler handles
        Note: handling the scheme of another handler produces unknown results
        @returns list of handled schemes
        '''
        return ["azure", "azures"]

    def close(self):
        '''
        Close this service
        '''
        pass

    def put(self, path, url):
        '''
        Put a file up to the cloud
        @param path - path to upload
        @param url - path in cloud to upload too
        '''
        if os.path.isdir(path):
            return walk(self.put, path, url)
        cont, blob = get_container_and_path(urlparse.urlparse(url).path)
        self.service.create_container(cont)
        self.service.put_block_blob_from_path(cont, blob, path)
        return True

    def get(self, url, dest):
        '''
        Get file(s) from the cloud
        @param url - url on cloud to pull down (on cloud)
        @param dest - dest to download too
        '''
        cont, blob = get_container_and_path(urlparse.urlparse(url).path)
        for b in self.service.list_blobs(cont, prefix=blob):
            destination = os.path.join(dest, os.path.relpath(
                b.name, blob)) if blob != b.name else dest
            if not os.path.exists(os.path.dirname(destination)):
                os.mkdir(os.path.dirname(destination))
            self.service.get_blob_to_path(cont, b.name, destination)
        return True

    def rm(self, url):
        '''
        Remove this url and all children urls
        @param url - url to remove
        '''
        cont, blob = get_container_and_path(urlparse.urlparse(url).path)
        for b in self.service.list_blobs(cont, prefix=blob):
            self.service.delete_blob(cont, b.name)
        return True
Esempio n. 25
0
    cert_format = 'pfx'
    cert_password = ''
    cert_res = sms.add_service_certificate(service_name=hosted_service_name,
                                           data=cert_data,
                                           certificate_format=cert_format,
                                           password=cert_password)
    operation_result = sms.get_operation_status(cert_res.request_id)


# Create a container
blob_service = BlobService(account_name=storage_acc_name,
                           account_key=storage_acc_key)

container_name = "vm-container"

result = blob_service.create_container(container_name)

container_url_template = "http://{}.blob.core.windows.net/{}"

container_url = container_url_template.format(storage_acc_name, container_name)

image_name = "b39f27a8b8c64d52b05eac6a62ebad85__Ubuntu-14_04-LTS-amd64-server-20140414-en-us-30GB"

blob_url = container_url + "/ubuntu.vhd"

os_hd = OSVirtualHardDisk(image_name, blob_url)

vm_name = name_generator()

linux_config = LinuxConfigurationSet(vm_name, 'rohan', 'qwerty12#', True)
Esempio n. 26
0
    file.seek(0)
    path = save_file(file, filename_md5)
    photo = Photo(creator_id = user.id, path = path)
    db.session.add(photo)
    db.session.commit()
    
    return "{0}".format(photo.id)
    
@app.route("/api/<session_key>/photo/<int:photo_id>", methods = ['GET'])
def photo_get(session_key, photo_id):
    session = Session.query.filter_by(session_key = session_key).first()
    if session is not None:
        user = User.query.filter_by(id = session.user_id).first()
    else:
        return "Error: Not Logged In."
    
    photo = Photo.query.filter_by(id = photo_id).first()
    
    if photo is not None:
        return get_file(photo.path)
    else:
        return "Error: Photo not found."

if __name__ == "__main__":
    if len(sys.argv) > 1 and sys.argv[1] == 'create_db':
        db.create_all()
    elif len(sys.argv) > 1 and sys.argv[1] == 'create_storage':
        blob_service.create_container('photos')
        
    else:
        app.run(host = '0.0.0.0', port=5000, debug = True)
Esempio n. 27
0
PASSWORD      = '******'
DB_DRIVER     = 'SQL Server Native Client 11.0'

# Specify the Azure Storage Account name where you will have a private blob to copy in the CSV file
STORAGEACCOUNTNAME = "ENTER AZURE STORAGE ACCOUNT NAME"
# Sepcify the storage account key.
# You can retrieve it "Primary Access Key" found on Azure portal Storage account blade by clicking on the "Key" icon.
# More info: https://azure.microsoft.com/en-us/documentation/articles/storage-create-storage-account/#manage-your-storage-access-keys
STORAGEKEY = "ENTER STORAGE ACCOUNT KEY "
#Read dataset
#Dataset is read from a public blob and copied to a private blob to locad it into SQL DW via Polybase

f = urllib2.urlopen('https://cahandson.blob.core.windows.net/nyctaxi/nyctaxipoint1pct.csv')
taxisample = f.read()
blob_service = BlobService(account_name=STORAGEACCOUNTNAME, account_key=STORAGEKEY)
blob_service.create_container('nyctaxinb')
blob_service.put_block_blob_from_bytes(
    'nyctaxinb',
    'nyctaxipoint1pct.csv',
    taxisample
)

# Construct the SQL DW Connection string
driver = 'DRIVER={' + DB_DRIVER + '}'
server = 'SERVER=' + SERVER_NAME
database = 'DATABASE=' + DATABASE_NAME
uid = 'UID=' + USERID
pwd = 'PWD=' + PASSWORD
CONNECTION_STRING = ';'.join([driver,server,database,uid,pwd, 'Encrypt=yes;TrustServerCertificate=no'])
print CONNECTION_STRING
Esempio n. 28
0
def setup_blob_service():    
    blobService = BlobService(account_name=os.environ["STORAGE_ACCOUNT"], account_key=os.environ["STORAGE_KEY"])
    blobService.create_container(TIMESERIES_CONTAINER)
    blobService.set_container_acl(TIMESERIES_CONTAINER, x_ms_blob_public_access='container')
  
    return blobService
Esempio n. 29
0
class AzureFS(LoggingMixIn, Operations):
    """Azure Blob Storage filesystem"""

    blobs = None
    containers = dict()  # <cname, dict(stat:dict,
    #files:None|dict<fname, stat>)
    fds = dict()  # <fd, (path, bytes, dirty)>
    fd = 0

    def __init__(self, account, key):
        self.blobs = BlobService(account, key)
        self.rebuild_container_list()

    def convert_to_epoch(self, date):
        """Converts Tue, 31 Jul 2012 07:17:34 GMT format to epoch"""
        return int(time.mktime(time.strptime(date, TIME_FORMAT)))

    def rebuild_container_list(self):
        cmap = dict()
        cnames = set()
        for c in self.blobs.list_containers():
            date = c.properties.last_modified
            cstat = dict(st_mode=(S_IFDIR | 0755),
                         st_uid=getuid(),
                         st_size=0,
                         st_mtime=self.convert_to_epoch(date))
            cname = c.name
            cmap['/' + cname] = dict(stat=cstat, files=None)
            cnames.add(cname)

        cmap['/'] = dict(files={},
                         stat=dict(st_mode=(S_IFDIR | 0755),
                                   st_uid=getuid(),
                                   st_size=0,
                                   st_mtime=int(time.time())))

        self.containers = cmap  # destroys fs tree cache resistant to misses

    def _parse_path(self, path):  # returns </dir, file(=None)>
        if path.count('/') > 1:  # file
            return str(path[:path.rfind('/')]), str(path[path.rfind('/') + 1:])
        else:  # dir
            pos = path.rfind('/', 1)
            if pos == -1:
                return path, None
            else:
                return str(path[:pos]), None

    def parse_container(self, path):
        base_container = path[1:]  # /abc/def/g --> abc
        if base_container.find('/') > -1:
            base_container = base_container[:base_container.find('/')]
        return str(base_container)

    def _get_dir(self, path, contents_required=False):
        if not self.containers:
            self.rebuild_container_list()

        if path in self.containers and not (contents_required and \
                self.containers[path]['files'] is None):
            return self.containers[path]

        cname = self.parse_container(path)

        if '/' + cname not in self.containers:
            raise FuseOSError(ENOENT)
        else:
            if self.containers['/' + cname]['files'] is None:
                # fetch contents of container
                log.info("------> CONTENTS NOT FOUND: %s" % cname)

                blobs = self.blobs.list_blobs(cname)

                dirstat = dict(st_mode=(S_IFDIR | 0755),
                               st_size=0,
                               st_uid=getuid(),
                               st_mtime=time.time())

                if self.containers['/' + cname]['files'] is None:
                    self.containers['/' + cname]['files'] = dict()

                for f in blobs:
                    blob_name = f.name
                    blob_date = f.properties.last_modified
                    blob_size = long(f.properties.content_length)

                    node = dict(st_mode=(S_IFREG | 0644),
                                st_size=blob_size,
                                st_mtime=self.convert_to_epoch(blob_date),
                                st_uid=getuid())

                    if blob_name.find('/') == -1:  # file just under container
                        self.containers['/' + cname]['files'][blob_name] = node

            return self.containers['/' + cname]
        return None

    def _get_file(self, path):
        d, f = self._parse_path(path)
        dir = self._get_dir(d, True)
        if dir is not None and f in dir['files']:
            return dir['files'][f]

    def getattr(self, path, fh=None):
        d, f = self._parse_path(path)

        if f is None:
            dir = self._get_dir(d)
            return dir['stat']
        else:
            file = self._get_file(path)

            if file:
                return file

        raise FuseOSError(ENOENT)

    # FUSE
    def mkdir(self, path, mode):
        if path.count('/') <= 1:  # create on root
            name = path[1:]

            if not 3 <= len(name) <= 63:
                log.error("Container names can be 3 through 63 chars long.")
                raise FuseOSError(ENAMETOOLONG)
            if name is not name.lower():
                log.error("Container names cannot contain uppercase \
                        characters.")
                raise FuseOSError(EACCES)
            if name.count('--') > 0:
                log.error('Container names cannot contain consecutive \
                        dashes (-).')
                raise FuseOSError(EAGAIN)
            #TODO handle all "-"s must be preceded by letter or numbers
            #TODO starts with only letter or number, can contain letter, nr,'-'

            resp = self.blobs.create_container(name)

            if resp:
                self.rebuild_container_list()
                log.info("CONTAINER %s CREATED" % name)
            else:
                raise FuseOSError(EACCES)
                log.error("Invalid container name or container already \
                        exists.")
        else:
            raise FuseOSError(ENOSYS)  # TODO support 2nd+ level mkdirs

    def rmdir(self, path):
        if path.count('/') == 1:
            c_name = path[1:]
            resp = self.blobs.delete_container(c_name)

            if resp:
                if path in self.containers:
                    del self.containers[path]
            else:
                raise FuseOSError(EACCES)
        else:
            raise FuseOSError(ENOSYS)  # TODO support 2nd+ level mkdirs

    def create(self, path, mode):
        node = dict(st_mode=(S_IFREG | mode),
                    st_size=0,
                    st_nlink=1,
                    st_uid=getuid(),
                    st_mtime=time.time())
        d, f = self._parse_path(path)

        if not f:
            log.error("Cannot create files on root level: /")
            raise FuseOSError(ENOSYS)

        dir = self._get_dir(d, True)
        if not dir:
            raise FuseOSError(EIO)
        dir['files'][f] = node

        return self.open(path, data='')  # reusing handler provider

    def open(self, path, flags=0, data=None):
        if data == None:  # download contents
            c_name = self.parse_container(path)
            f_name = path[path.find('/', 1) + 1:]

            try:
                data = self.blobs.get_blob(c_name, f_name)
            except AzureMissingResourceHttpError:
                dir = self._get_dir('/' + c_name, True)
                if f_name in dir['files']:
                    del dir['files'][f_name]
                raise FuseOSError(ENOENT)
            except AzureException as e:
                log.error("Read blob failed HTTP %d" % e.code)
                raise FuseOSError(EAGAIN)

        self.fd += 1
        self.fds[self.fd] = (path, data, False)

        return self.fd

    def flush(self, path, fh=None):
        if not fh:
            raise FuseOSError(EIO)
        else:
            if fh not in self.fds:
                raise FuseOSError(EIO)
            path = self.fds[fh][0]
            data = self.fds[fh][1]
            dirty = self.fds[fh][2]

            if not dirty:
                return 0  # avoid redundant write

            d, f = self._parse_path(path)
            c_name = self.parse_container(path)

            if data is None:
                data = ''

            try:
                if len(data) < 64 * 1024 * 1024:  # 64 mb
                    self.blobs.put_blob(c_name, f, data, 'BlockBlob')
                else:
                    # divide file by blocks and upload
                    block_size = 8 * 1024 * 1024
                    num_blocks = int(math.ceil(len(data) * 1.0 / block_size))
                    rd = str(random.randint(1, 1e8))
                    block_ids = list()

                    for i in range(num_blocks):
                        part = data[i * block_size:min((i + 1) *
                                                       block_size, len(data))]
                        block_id = base64.encodestring(
                            '%s_%s' % (rd, (8 - len(str(i))) * '0' + str(i)))
                        self.blobs.put_block(c_name, f, part, block_id)
                        block_ids.append(block_id)

                    self.blobs.put_block_list(c_name, f, block_ids)
            except AzureException:
                raise FuseOSError(EAGAIN)

            dir = self._get_dir(d, True)
            if not dir or f not in dir['files']:
                raise FuseOSError(EIO)

            # update local data
            dir['files'][f]['st_size'] = len(data)
            dir['files'][f]['st_mtime'] = time.time()
            self.fds[fh] = (path, data, False)  # mark as not dirty
            return 0

    def release(self, path, fh=None):
        if fh is not None and fh in self.fds:
            del self.fds[fh]

    def truncate(self, path, length, fh=None):
        return 0  # assume done, no need

    def write(self, path, data, offset, fh=None):
        if not fh or fh not in self.fds:
            raise FuseOSError(ENOENT)
        else:
            d = self.fds[fh][1]
            if d is None:
                d = ""
            self.fds[fh] = (self.fds[fh][0], d[:offset] + data, True)
            return len(data)

    def unlink(self, path):
        c_name = self.parse_container(path)
        d, f = self._parse_path(path)

        try:
            self.blobs.delete_blob(c_name, f)

            _dir = self._get_dir(path, True)
            if _dir and f in _dir['files']:
                del _dir['files'][f]
            return 0
        except AzureMissingResourceHttpError:
            raise FuseOSError(ENOENT)
        except Exception as e:
            raise FuseOSError(EAGAIN)

    def readdir(self, path, fh):
        if path == '/':
            return ['.', '..'] + [x[1:] for x in self.containers.keys() \
                    if x is not '/']

        dir = self._get_dir(path, True)
        if not dir:
            raise FuseOSError(ENOENT)
        return ['.', '..'] + dir['files'].keys()

    def read(self, path, size, offset, fh):
        if not fh or fh not in self.fds:
            raise FuseOSError(ENOENT)

        f_name = path[path.find('/', 1) + 1:]
        c_name = path[1:path.find('/', 1)]

        try:
            data = self.blobs.get_blob(c_name, f_name)
            self.fds[fh] = (self.fds[fh][0], data, False)
            return data[offset:offset + size]
        except URLError, e:
            if e.code == 404:
                raise FuseOSError(ENOENT)
            elif e.code == 403:
                raise FUSEOSError(EPERM)
            else:
                log.error("Read blob failed HTTP %d" % e.code)
                raise FuseOSError(EAGAIN)
        data = self.fds[fh][1]
        if data is None:
            data = ""
        return data[offset:offset + size]
class AzureConnector():

    def __init__(self, config):

        tree = ET.parse('SharedConfig.xml')
        self.myMachineName = tree.find('.//Instance').get("id")

        self.sms = ServiceManagementService(
            subscription_id=config.get("azure", "subscription_id"),
            cert_file=config.get("azure", "cert_file")
        );

        self.bus_service = ServiceBusService(
            service_namespace=config.get("azure", "bus_namespace"),
            shared_access_key_name=config.get("azure", "bus_shared_access_key_name"),
            shared_access_key_value=config.get("azure", "bus_shared_access_key_value"))

        self.command_queue = config.get("azure", "commandQueuePath")
        for tries in range(1,10):
            try:
                self.bus_service.create_queue(self.command_queue)
                break
            except:
                print "Esperando"
            
        self.status_topic = config.get("azure", "statusTopicPath")
        self.bus_service.create_queue(self.status_topic)

        self.storage = BlobService(account_name=config.get("azure", "account_name"),
                                   account_key=config.get("azure", "account_key"))

        self.algo_storage_name = config.get("azure", "algorithm_storage_name")
        self.storage.create_container(self.algo_storage_name, fail_on_exist=False)

        self.proj_storage_name = config.get("azure", "project_storage_name")
        self.storage.create_container(self.proj_storage_name, fail_on_exist=False)

    def check_new_tasks(self):

        for tries in range(1,2):
            try:
                message = self.bus_service.receive_queue_message(self.command_queue, peek_lock=False, timeout=60)
                break
            except:
                message = None

        if message is None or message.body is None:
            return None

        job_description = json.loads(message.body.replace('/AzureBlobStorage/', ''))

        command = CommandMetadata(
            command_id = job_description["command_id"],
            algorithm_directory = job_description["algorithm_prfx"],
            project_prfx = job_description["project_prfx"],
            project_input_files = job_description["project_input_files"],
            algorithm_executable_name = job_description["algorithm_executable_name"],
            algorithm_parameters = job_description["algorithm_parameters"],
            sent_timestamp = datetime.datetime.strptime(job_description["sent_timestamp"], "%d/%m/%Y %H:%M:%S"),
            machine_size=job_description["machine_size"])

        # Retornar dados sobre o comando consumido da fila
        return command

        # Não há nada na fila
        return None

    def list_algo_files(self, prfx):

        list = self.storage.list_blobs(container_name=self.algo_storage_name, prefix=prfx)
        result = []
        for blob in list:
            result.append(blob.name)
        return result

    def download_algo_zip(self, algorithm_bin_file, tmp_file):
        print "download_algo_zip(algorithm_bin_file="+algorithm_bin_file+", tmp_file="+tmp_file+")"
        for tries in range(1,5):
            try:
                self.storage.get_blob_to_path(self.algo_storage_name, algorithm_bin_file, tmp_file,
                                 open_mode='wb', snapshot=None, x_ms_lease_id=None,
                                 progress_callback=None)
                break

            except Exception as e:

                if tries == 5:
                    print("Muitos erros de conexão. Operação abortada.")
                else:
                    print("Erro de conexão com serviço. Retentando..." + e.__str__())

    def download_file_to_project(self, project_name, blob_name, dir):
        print "download_file_to_project(project_name="+project_name+", blob_name="+blob_name+", dir="+dir+")"
        for tries in range(1,5):
            try:
                self.storage.get_blob_to_path(self.proj_storage_name,
                                              os.path.join(project_name,blob_name),
                                              os.path.join(dir,os.path.join(project_name,blob_name)),
                                              open_mode='wb', snapshot=None, x_ms_lease_id=None,
                                              progress_callback=None)
                break

            except Exception as e:

                if tries == 5:
                    print("Muitos erros de conexão. Operação abortada.")
                else:
                    print("Erro de conexão com serviço. Retentando..." + e.__str__())

    def download_file_to_project(self, project_name, blob_name, dir):
        print "download_file_to_project(project_name="+project_name+", blob_name="+blob_name+", dir="+dir+")"
        for tries in range(1,5):
            try:
                self.storage.get_blob_to_path(self.proj_storage_name,
                                              os.path.join(project_name,blob_name),
                                              os.path.join(dir,os.path.join(project_name,blob_name)),
                                              open_mode='wb', snapshot=None, x_ms_lease_id=None,
                                              progress_callback=None)
                break

            except Exception as e:

                if tries == 5:
                    print("Muitos erros de conexão. Operação abortada.")
                else:
                    print("Erro de conexão com serviço. Retentando..." + e.__str__())

    def upload_proj_file(self, project_name, blob_name, dir):
        print "upload_proj_file(project_name="+project_name+", blob_name="+blob_name+", dir="+dir+")"
        if blob_name[0] == '/':
            blob_name = blob_name[1:]
        for tries in range(1,5):
            try:
                self.storage.put_block_blob_from_path(self.proj_storage_name,
                                              os.path.join(project_name,blob_name),
                                              os.path.join(dir,os.path.join(project_name,blob_name)))
                break

            except Exception as e:

                if tries == 5:
                    print("Muitos erros de conexão. Operação abortada.")
                else:
                    print("Erro de conexão com serviço. Retentando..." + e.__str__())

    def download_file_to_algo(self, blob_name, dir):
        print "download_file_to_algo(blob_name="+blob_name+", dir="+dir+")"

        for tries in range(1,5):
            try:
                self.storage.get_blob_to_path(container_name=self.algo_storage_name,
                                              blob_name=os.path.join(blob_name),
                                              file_path=os.path.join(dir,blob_name),
                                              open_mode='wb', snapshot=None, x_ms_lease_id=None,
                                              progress_callback=None)
                break

            except Exception as e:

                if tries == 5:
                    print("Muitos erros de conexão. Operação abortada.")
                else:
                    print("Erro de conexão com serviço. Retentando..." + e.__str__())


    def send_status(self, main_status):
        for tries in range(1,5):
            try:
                self.bus_service.send_topic_message(topic_name=self.status_topic,
                                                    message=Message(main_status.encode('utf-8')))
                break

            except Exception as e:

                if tries == 5:
                    print("Muitos erros de conexão. Operação abortada.")
                else:
                    print("Erro de conexão com serviço. Retentando..." + e.__str__())

    def shutdown_myself(self):

        # A máquina virtual irá cometer suicídio.
        print("Removendo máquina virtual da nuvem...")
        for tries in range(1,5):
            try:
                self.sms.delete_deployment(
                    service_name=self.myMachineName,
                    deployment_name=self.myMachineName, delete_vhd=True)
                exit(0)
                break

            except Exception as e:

                if tries == 5:
                    print("Muitos erros de conexão. Operação abortada.")
                else:
                    print("Erro de conexão com serviço. Retentando..." + e.__str__())
Esempio n. 31
0
                                     row[KEY_FIELD],
                                     json.dumps(tileTimePeriodResultSet),
                                     "BlockBlob",
                                     x_ms_blob_cache_control="max-age=3600",
                                     x_ms_blob_content_type="application/json")
                successful = True
            except:
                print "error putting heatmap: ", sys.exc_info()[0]
                continue
    yield None


blobService = BlobService(account_name=os.environ["LOCATION_STORAGE_ACCOUNT"],
                          account_key=os.environ["LOCATION_STORAGE_KEY"])

blobService.create_container(HEATMAP_CONTAINER)
blobService.set_container_acl(HEATMAP_CONTAINER,
                              x_ms_blob_public_access='container')


def check_config():
    if not "LOCATION_STORAGE_ACCOUNT" in os.environ:
        print "Required environment variable LOCATION_STORAGE_ACCOUNT missing."

    if not "LOCATION_STORAGE_KEY" in os.environ:
        print "Required environment variable LOCATION_STORAGE_KEY missing."

    if not "LOCATIONS_ROOT" in os.environ:
        print "Required environment variable LOCATIONS_ROOT missing."

Esempio n. 32
0
class Command(BaseCommand):
    help = "Synchronizes static media to cloud files."

    option_list = BaseCommand.option_list + (
        optparse.make_option('-w', '--wipe',
            action='store_true', dest='wipe', default=False,
            help="Wipes out entire contents of container first."),
        optparse.make_option('-t', '--test-run',
            action='store_true', dest='test_run', default=False,
            help="Performs a test run of the sync."),
        optparse.make_option('-c', '--container',
            dest='container', help="Override STATIC_CONTAINER."),
    )

    # settings from azurite.settings
    ACCOUNT_NAME     = AZURITE['ACCOUNT_NAME']
    ACCOUNT_KEY      = AZURITE['ACCOUNT_KEY']
    STATIC_CONTAINER = AZURITE['STATIC_CONTAINER']

    # paths
    DIRECTORY        = os.path.abspath(settings.STATIC_ROOT)
    STATIC_URL       = settings.STATIC_URL

    if not DIRECTORY.endswith('/'):
        DIRECTORY = DIRECTORY + '/'

    if STATIC_URL.startswith('/'):
        STATIC_URL = STATIC_URL[1:]

    local_object_names = []
    create_count = 0
    upload_count = 0
    update_count = 0
    skip_count = 0
    delete_count = 0
    service = None

    def handle(self, *args, **options):
        self.wipe = options.get('wipe')
        self.test_run = options.get('test_run')
        self.verbosity = int(options.get('verbosity'))
        if hasattr(options, 'container'):
            self.STATIC_CONTAINER = options.get('container')
        self.sync_files()

    def sync_files(self):
        self.service = BlobService(account_name=self.ACCOUNT_NAME,
            account_key=self.ACCOUNT_KEY)

        try:
            self.service.get_container_properties(self.STATIC_CONTAINER)
        except AzureMissingResourceHttpError:
            self.service.create_container(self.STATIC_CONTAINER,
                x_ms_blob_public_access='blob')

        self.service.set_container_acl(self.STATIC_CONTAINER, x_ms_blob_public_access='blob')

        # if -w option is provided, wipe out the contents of the container
        if self.wipe:
            blob_count = len(self.service.list_blobs(self.STATIC_CONTAINER))

            if self.test_run:
                print "Wipe would delete %d objects." % blob_count
            else:
                print "Deleting %d objects..." % blob_count
                for blob in self.service.list_blobs(self.STATIC_CONTAINER):
                    self.service.delete_blob(self.STATIC_CONTAINER, blob.name)

        # walk through the directory, creating or updating files on the cloud
        os.path.walk(self.DIRECTORY, self.upload_files, "foo")

        # remove any files on remote that don't exist locally
        self.delete_files()

        # print out the final tally to the cmd line
        self.update_count = self.upload_count - self.create_count
        print
        if self.test_run:
            print "Test run complete with the following results:"
        print "Skipped %d. Created %d. Updated %d. Deleted %d." % (
            self.skip_count, self.create_count, self.update_count, self.delete_count)

    def upload_files(self, arg, dirname, names):
        # upload or skip items
        for item in names:
            file_path = os.path.join(dirname, item)
            if os.path.isdir(file_path):
                continue # Don't try to upload directories

            object_name = self.STATIC_URL + file_path.split(self.DIRECTORY)[1]
            self.local_object_names.append(object_name)

            try:
                properties = self.service.get_blob_properties(self.STATIC_CONTAINER,
                    object_name)
            except AzureMissingResourceHttpError:
                properties = {}
                self.create_count += 1

            cloud_datetime = None
            if 'last-modified' in properties:
                cloud_datetime = (properties['last-modified'] and
                                  datetime.datetime.strptime(
                                    properties['last-modified'],
                                    "%a, %d %b %Y %H:%M:%S %Z"
                                  ) or None)

            local_datetime = datetime.datetime.utcfromtimestamp(
                                               os.stat(file_path).st_mtime)

            if cloud_datetime and local_datetime < cloud_datetime:
                self.skip_count += 1
                if self.verbosity > 1:
                    print "Skipped %s: not modified." % object_name
                continue

            if not self.test_run:
                file_contents = open(file_path, 'r').read()
                content_type, encoding = mimetypes.guess_type(file_path)
                self.service.put_blob(self.STATIC_CONTAINER, object_name, file_contents,
                    x_ms_blob_type='BlockBlob', x_ms_blob_content_type=content_type,
                    content_encoding=encoding)
                # sync_headers(cloud_obj)
            self.upload_count += 1
            if self.verbosity > 1:
                print "Uploaded", object_name

    def delete_files(self):
        # remove any objects in the container that don't exist locally
        for blob in self.service.list_blobs(self.STATIC_CONTAINER):
            if blob.name not in self.local_object_names:
                self.delete_count += 1
                if self.verbosity > 1:
                    print "Deleted %s" % blob.name
                if not self.test_run:
                    self.service.delete_blob(self.STATIC_CONTAINER, blob.name)
Esempio n. 33
0
import os
from flask import Flask, request, redirect, url_for
from werkzeug import secure_filename
from azure.storage.blob import BlobService
import string
import random


blob_service = BlobService(account_name='manthan', account_key='q6+oDMKpKUyYe4aWuICSYL+APQZlTvJzgChEq8py72F2aek6SV3wKAL7445Tw9t0FLdHF0LUXn/ja17w7kwCgQ==')
blob_service.create_container('reports', x_ms_blob_public_access='container')



UPLOAD_FOLDER = '/path/to/the/uploads'
ALLOWED_EXTENSIONS = set(['jpeg','png'])

app = Flask(__name__)
app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER


def allowed_file(filename):
    return '.' in filename and \
           filename.rsplit('.', 1)[1] in ALLOWED_EXTENSIONS

@app.route('/', methods=['GET', 'POST'])
def upload_file():
    if request.method == 'POST':
        file = request.files['file']
        if file and allowed_file(file.filename):
            filename = secure_filename(file.filename)
            fileextension = filename.rsplit('.',1)[1]
Esempio n. 34
0
    cert_data = base64.b64encode(bfile.read()).decode()
    cert_format = 'pfx'
    cert_password = ''
    cert_res = sms.add_service_certificate(service_name=hosted_service_name,
                                           data=cert_data,
                                           certificate_format=cert_format,
                                           password=cert_password)
    operation_result = sms.get_operation_status(cert_res.request_id)

# Create a container
blob_service = BlobService(account_name=storage_acc_name,
                           account_key=storage_acc_key)

container_name = "vm-container"

result = blob_service.create_container(container_name)

container_url_template = "http://{}.blob.core.windows.net/{}"

container_url = container_url_template.format(storage_acc_name, container_name)

image_name = "b39f27a8b8c64d52b05eac6a62ebad85__Ubuntu-14_04-LTS-amd64-server-20140414-en-us-30GB"

blob_url = container_url + "/ubuntu.vhd"

os_hd = OSVirtualHardDisk(image_name, blob_url)

vm_name = name_generator()

linux_config = LinuxConfigurationSet(vm_name, 'rohan', 'qwerty12#', True)
Esempio n. 35
0
def initilizeAzure():
        blob_service = BlobService(account_name='manthan', account_key='q6+oDMKpKUyYe4aWuICSYL+APQZlTvJzgChEq8py72F2aek6SV3wKAL7445Tw9t0FLdHF0LUXn/ja17w7kwCgQ==')
        print "blob init"
        blob_service.create_container('reports', x_ms_blob_public_access='container')
        print "blob did"
        return blob_service
def main():
    service = BlobService(settings.STORAGE_ACCOUNT_NAME, settings.STORAGE_ACCOUNT_KEY)
    service.create_container(CONTAINER_NAME)

    process(service, LOCAL_BLOCK_BLOB_FILES, CONNECTION_COUNTS, is_page_blob=False)
    process(service, LOCAL_PAGE_BLOB_FILES, CONNECTION_COUNTS, is_page_blob=True)
Esempio n. 37
0
class AzureFS(LoggingMixIn, Operations):
    """
    Azure Blob Storage filesystem
    """

    blobs = None
    containers = dict()  # {cname: {stat:dict, files:None|{fname: stat}}
    fd = 0

    def __init__(self, account, key):
        self.blobs = BlobService(account, key)
        self._rebuild_container_list()

    def _rebuild_container_list(self):
        cmap = dict()
        cnames = set()

        for c in self.blobs.list_containers():
            cstat = make_stat(stat.S_IFDIR | 0755, c.properties)

            cname = c.name
            cmap['/' + cname] = dict(stat=cstat, files=None)
            cnames.add(cname)

        cmap['/'] = dict(files={}, stat=make_stat(stat.S_IFDIR | 0755))
        self.containers = cmap  # destroys fs tree cache resistant to misses

    @staticmethod
    def _parse_path(path):  # returns </dir, file(=None)>
        if path.count('/') > 1:  # file
            return str(path[:path.rfind('/')]), str(path[path.rfind('/') + 1:])
        else:  # dir
            pos = path.rfind('/', 1)
            if pos == -1:
                return path, None
            else:
                return str(path[:pos]), None

    @staticmethod
    def _parse_container(path):
        base_container = path[1:]  # /abc/def/g --> abc
        if base_container.find('/') > -1:
            base_container = base_container[:base_container.find('/')]
        return str(base_container)

    def _get_dir(self, path, contents_required=False, force=False):
        log.debug("get_dir: contents_required=%s, force=%s,"
                  " has_container=%s, path=%s",
                  "t" if contents_required else "f",
                  "t" if force else "f",
                  "t" if path in self.containers else "f",
                  path)
        cname = self._parse_container(path)

        if 'process' in self.containers['/' + cname] and \
                self.containers['/' + cname]['process'] is not None:
            p = self.containers['/' + cname]['process']
            if not p.is_alive():
                p.join()
                self.containers['/' + cname]['process'] = None

        if not self.containers:
            log.info("get_dir: rebuilding container list")
            self._rebuild_container_list()

        if path in self.containers:
            container = self.containers[path]
            if not contents_required:
                return container
            if not force and container['files'] is not None:
                return container

        if '/' + cname not in self.containers:
            log.info("get_dir: no such container: /%s", cname)
            raise FuseOSError(errno.ENOENT)
        else:
            container = self.containers['/' + cname]
            try:
                log.info(">>>> %s - %s ",
                         cname,
                         container['process'])
            except KeyError:
                log.info(">>>> no process: %s " % cname)
            if container['files'] is None or force is True:
                # fetch contents of container
                log.info("Contents not found in the cache index: %s", cname)

                process = container.get('process', None)
                if process is not None and process.is_alive():
                    # We do nothing. Some thread is still working,
                    # getting list of blobs from the container.
                    log.info("Fetching blob list for '%s' is already"
                             " handled by %s", cname, process)
                else:
                    # No thread running for this container, launch a new one
                    m = Manager()
                    files = m.dict()
                    process = Process(target=get_files_from_blob_service,
                                      args=(self.blobs, cname, files),
                                      name="list-blobs/%s" % cname)
                    process.daemon = True
                    process.start()
                    container['process'] = process
                    log.info("Started blob list retrieval for '%s': %s",
                             cname, process)
                    container['files'] = files
            return container

    def _get_file(self, path):
        d, f = self._parse_path(path)
        log.debug("get_file: requested path=%s (d=%s, f=%s)", path, d, f)
        directory = self._get_dir(d, True)
        files = None
        if directory is not None:
            files = directory['files']
            if f in files:
                return files[f]

        if not hasattr(self, "_get_file_noent"):
            self._get_file_noent = {}

        last_check = self._get_file_noent.get(path, 0)
        if time.time() - last_check <= 30:
            # Negative TTL is 30 seconds (hardcoded for now)
            log.info("get_file: cache says to reply negative for %s", path)
            return None

        # Check if file now exists and our caches are just stale.
        try:
            c = self._parse_container(d)
            p = path[path.find('/', 1) + 1:]
            props = self.blobs.get_blob_properties(c, p)
            log.info("get_file: found locally unknown remote file %s: %s",
                     path, repr(props))

            node = make_stat(stat.S_IFREG | 0644, props)

            if node['st_size'] > 0:
                log.info("get_file: properties for %s: %s", path, repr(node))
                # Remember this, so we won't have to re-query it.
                files[f] = node
                if path in self._get_file_noent:
                    del self._get_file_noent[path]
                return node
            else:
                # TODO: FIXME: HACK: We currently ignore empty files.
                # Sometimes the file is not yet here and is still uploading.
                # Such files have "content-length: 0". Ignore those for now.
                log.warning("get_file: the file %s is not yet here (size=%s)",
                            path, node['st_size'])
                self._get_file_noent[path] = time.time()
                return None
        except AzureMissingResourceHttpError:
            log.info("get_file: remote confirms non-existence of %s", path)
            self._get_file_noent[path] = time.time()
            return None
        except AzureException as e:
            log.error("get_file: exception while querying remote for %s: %s",
                      path, repr(e))
            self._get_file_noent[path] = time.time()

        return None

    def getattr(self, path, fh=None):
        log.debug("getattr: path=%s", path)
        d, f = self._parse_path(path)

        if f is None:
            return self._get_dir(d)['stat']
        else:
            file_obj = self._get_file(path)
            if file_obj:
                return file_obj

        log.warning("getattr: no such file: %s", path)
        raise FuseOSError(errno.ENOENT)

    def mkdir(self, path, mode):
        if path.count('/') <= 1:  # create on root
            name = path[1:]
            if not 3 <= len(name) <= 63:
                log.error("Container names can be 3 through 63 chars long")
                raise FuseOSError(errno.ENAMETOOLONG)

            if not re.match(RE_CONTAINER_NAME, name):
                log.error("Invalid container name: '%s'", name)
                raise FuseOSError(errno.EACCES)

            resp = self.blobs.create_container(name)
            if resp:
                self._rebuild_container_list()
                log.info("CONTAINER %s CREATED", name)
            else:
                log.error("Invalid container name or container already exists")
                raise FuseOSError(errno.EACCES)
        else:
            # TODO: Support 2nd+ level directory creation
            raise FuseOSError(errno.ENOSYS)

    def rmdir(self, path):
        if path.count('/') == 1:
            c_name = path[1:]
            resp = self.blobs.delete_container(c_name)

            if resp:
                if path in self.containers:
                    del self.containers[path]
            else:
                raise FuseOSError(errno.EACCES)
        else:
            # TODO: Support 2nd+ level directories
            raise FuseOSError(errno.ENOSYS)

    def create(self, path, mode, fi=None):
        node = make_stat(stat.S_IFREG | mode)
        d, f = self._parse_path(path)

        if not f:
            log.error("Cannot create files on root level: /")
            raise FuseOSError(errno.ENOSYS)

        if f == ".__refresh_cache__":
            log.info("Refresh cache forced (%s)" % f)
            self._get_dir(path, True, True)
            return self.open(path, data='')

        directory = self._get_dir(d, True)
        if not directory:
            raise FuseOSError(errno.EIO)
        directory['files'][f] = node

        return self.open(path, data='')  # reusing handler provider

    def open(self, path, flags=0, data=None):
        log.info("open: path=%s; flags=%s", path, flags)
        if data is None:
            # Download contents
            c_name = self._parse_container(path)
            f_name = path[path.find('/', 1) + 1:]

            try:
                self.blobs.get_blob_metadata(c_name, f_name)
            except AzureMissingResourceHttpError:
                directory = self._get_dir('/' + c_name, True)
                if f_name in directory['files']:
                    del directory['files'][f_name]
                log.info("open: remote says there is no such file: c=%s f=%s",
                         c_name, f_name)
                raise FuseOSError(errno.ENOENT)
            except AzureHttpError as e:
                log.error("Read blob failed with HTTP %d", e.status_code)
                raise FuseOSError(errno.EAGAIN)
            except AzureException as e:
                log.exception("Read blob failed with exception: %s", repr(e))
                raise FuseOSError(errno.EAGAIN)
        self.fd += 1
        return self.fd

    def truncate(self, path, length, fh=None):
        return 0  # assume done, no need

    def write(self, path, data, offset, fh=None):
        # TODO: Re-implement writing
        raise FuseOSError(errno.EPERM)

    def unlink(self, path):
        c_name = self._parse_container(path)
        d, f = self._parse_path(path)

        try:
            self.blobs.delete_blob(c_name, f)

            _dir = self._get_dir(path, True)
            if _dir and f in _dir['files']:
                del _dir['files'][f]
            return 0
        except AzureMissingResourceHttpError:
            raise FuseOSError(errno.ENOENT)
        except:
            raise FuseOSError(errno.EAGAIN)

    def readdir(self, path, fh):
        if path == '/':
            return ['.', '..'] + [x[1:] for x in self.containers.keys()
                                  if x != '/']

        directory = self._get_dir(path, True)
        if not directory:
            log.info("readdir: no such file: %s", path)
            raise FuseOSError(errno.ENOENT)
        return ['.', '..'] + directory['files'].keys()

    def read(self, path, size, offset, fh):
        f_name = path[path.find('/', 1) + 1:]
        c_name = path[1:path.find('/', 1)]

        try:
            byte_range = "bytes=%s-%s" % (offset, offset + size - 1)
            log.debug("read range: %s", byte_range)
            data = self.blobs.get_blob(c_name, f_name, snapshot=None,
                                       x_ms_range=byte_range)
            return data
        except AzureHttpError as e:
            if e.status_code == 404:
                raise FuseOSError(errno.ENOENT)
            elif e.status_code == 403:
                raise FuseOSError(errno.EPERM)
            else:
                log.error("Read blob failed with HTTP %d", e.status_code)
                raise FuseOSError(errno.EAGAIN)

    def statfs(self, path):
        return dict(f_bsize=4096, f_blocks=1, f_bavail=sys.maxint)

    def rename(self, old, new):
        # TODO: Implement renaming
        raise FuseOSError(errno.ENOSYS)

    def symlink(self, target, source):
        raise FuseOSError(errno.ENOSYS)

    def getxattr(self, path, name, position=0):
        return ''

    def chmod(self, path, mode):
        pass

    def chown(self, path, uid, gid):
        pass
Esempio n. 38
0
class AzureIOStore(IOStore):
    """
    A class that lets you get input from and send output to Azure Storage.
    
    """
    
    def __init__(self, account_name, container_name, name_prefix=""):
        """
        Make a new AzureIOStore that reads from and writes to the given
        container in the given account, adding the given prefix to keys. All
        paths will be interpreted as keys or key prefixes.
        
        If the name prefix does not end with a trailing slash, and is not empty,
        one will be added automatically.
        
        Account keys are retrieved from the AZURE_ACCOUNT_KEY environment
        variable or from the ~/.toilAzureCredentials file, as in Toil itself.
        
        """
        
        # Make sure azure libraries actually loaded
        assert(have_azure)
        
        self.account_name = account_name
        self.container_name = container_name
        self.name_prefix = name_prefix
        
        if self.name_prefix != "" and not self.name_prefix.endswith("/"):
            # Make sure it has the trailing slash required.
            self.name_prefix += "/"
        
        # Sneak into Toil and use the same keys it uses
        self.account_key = toil.jobStores.azureJobStore._fetchAzureAccountKey(
            self.account_name)
            
        # This will hold out Azure blob store connection
        self.connection = None
        
    def __getstate__(self):
        """
        Return the state to use for pickling. We don't want to try and pickle
        an open Azure connection.
        """
     
        return (self.account_name, self.account_key, self.container_name, 
            self.name_prefix)
        
    def __setstate__(self, state):
        """
        Set up after unpickling.
        """
        
        self.account_name = state[0]
        self.account_key = state[1]
        self.container_name = state[2]
        self.name_prefix = state[3]
        
        self.connection = None
        
    def __connect(self):
        """
        Make sure we have an Azure connection, and set one up if we don't.
        """
        
        if self.connection is None:
            RealTimeLogger.get().debug("Connecting to account {}, using "
                "container {} and prefix {}".format(self.account_name,
                self.container_name, self.name_prefix))
        
            # Connect to the blob service where we keep everything
            self.connection = BlobService(
                account_name=self.account_name, account_key=self.account_key)
            
    @backoff        
    def read_input_file(self, input_path, local_path):
        """
        Get input from Azure.
        """
        
        self.__connect()
        
        
        RealTimeLogger.get().debug("Loading {} from AzureIOStore".format(
            input_path))
        
        # Download the blob. This is known to be synchronous, although it can
        # call a callback during the process.
        self.connection.get_blob_to_path(self.container_name,
            self.name_prefix + input_path, local_path)
            
    def list_input_directory(self, input_path, recursive=False,
        with_times=False):
        """
        Loop over fake /-delimited directories on Azure. The prefix may or may
        not not have a trailing slash; if not, one will be added automatically.
        
        Returns the names of files and fake directories in the given input fake
        directory, non-recursively.
        
        If with_times is specified, will yield (name, time) pairs including
        modification times as datetime objects. Times on directories are None.
        
        """
        
        self.__connect()
        
        RealTimeLogger.get().info("Enumerating {} from AzureIOStore".format(
            input_path))
        
        # Work out what the directory name to list is
        fake_directory = self.name_prefix + input_path
        
        if fake_directory != "" and not fake_directory.endswith("/"):
            # We have a nonempty prefix, and we need to end it with a slash
            fake_directory += "/"
        
        # This will hold the marker that we need to send back to get the next
        # page, if there is one. See <http://stackoverflow.com/a/24303682>
        marker = None
        
        # This holds the subdirectories we found; we yield each exactly once if
        # we aren't recursing.
        subdirectories = set()
        
        while True:
        
            # Get the results from Azure. We don't use delimiter since Azure
            # doesn't seem to provide the placeholder entries it's supposed to.
            
            result = self.connection.list_blobs(self.container_name, 
                marker=marker)
                
            RealTimeLogger.get().info("Found {} files".format(len(result)))
                
            for blob in result:
                # Yield each result's blob name, but directory names only once
                
                # Drop the common prefix
                relative_path = blob.name
                
                if (not recursive) and "/" in relative_path:
                    # We found a file in a subdirectory, and we aren't supposed
                    # to be recursing.
                    subdirectory, _ = relative_path.split("/", 1)
                    
                    if subdirectory not in subdirectories:
                        # It's a new subdirectory. Yield and remember it
                        subdirectories.add(subdirectory)
                        
                        if with_times:
                            yield subdirectory, None
                        else:
                            yield subdirectory
                else:
                    # We found an actual file 
                    if with_times:
                        mtime = dateutil.parser.parse(
                            blob.properties.last_modified).replace(
                            tzinfo=dateutil.tz.tzutc())
                        yield relative_path, mtime
                            
                    else:
                        yield relative_path
                
            # Save the marker
            marker = result.next_marker
                
            if not marker:
                break
                
    @backoff
    def write_output_file(self, local_path, output_path):
        """
        Write output to Azure. Will create the container if necessary.
        """
        
        self.__connect()
        
        RealTimeLogger.get().debug("Saving {} to AzureIOStore".format(
            output_path))
        
        try:
            # Make the container
            self.connection.create_container(self.container_name)
        except azure.WindowsAzureConflictError:
            # The container probably already exists
            pass
        
        # Upload the blob (synchronously)
        # TODO: catch no container error here, make the container, and retry
        self.connection.put_block_blob_from_path(self.container_name,
            self.name_prefix + output_path, local_path)
    
    @backoff        
    def exists(self, path):
        """
        Returns true if the given input or output file exists in Azure already.
        
        """
        
        self.__connect()
        
        marker = None
        
        while True:
        
            try:
                # Make the container
                self.connection.create_container(self.container_name)
            except azure.WindowsAzureConflictError:
                # The container probably already exists
                pass
            
            # Get the results from Azure.
            result = self.connection.list_blobs(self.container_name, 
                prefix=self.name_prefix + path, marker=marker)
                
            for blob in result:
                # Look at each blob
                
                if blob.name == self.name_prefix + path:
                    # Found it
                    return True
                
            # Save the marker
            marker = result.next_marker
                
            if not marker:
                break 
        
        return False
        
        
    @backoff        
    def get_mtime(self, path):
        """
        Returns the modification time of the given blob if it exists, or None
        otherwise.
        
        """
        
        self.__connect()
        
        marker = None
        
        while True:
        
            # Get the results from Azure.
            result = self.connection.list_blobs(self.container_name, 
                prefix=self.name_prefix + path, marker=marker)
                
            for blob in result:
                # Look at each blob
                
                if blob.name == self.name_prefix + path:
                    # Found it
                    return dateutil.parser.parse(
                        blob.properties.last_modified).replace(
                        tzinfo=dateutil.tz.tzutc())
                
            # Save the marker
            marker = result.next_marker
                
            if not marker:
                break 
        
        return None
from azure.storage.blob import BlobService
import socket
import sys
 
azureStorageAccountName = "removed"
azureStorageAccountKey = "removed"
container = "ilbcp1"
blob = "currentprimary.dat"
retryCount = 0
while 1:
    # keep main thread running
    try:
        print ("Started.")
        currentHost = socket.gethostname()
        print ("Setting as primary...")
        blob_service = BlobService(account_name=azureStorageAccountName, account_key=azureStorageAccountKey)
        newContents = currentHost
        blob_service.create_container(container)
        blob_service.put_block_blob_from_text(container, blob, newContents)
        print ("Done.")
        sys.exit()
    except Exception as e:
        print("Exception!") # e ?
        retryCount = retryCount + 1
        if retryCount>5:
            print ("Permanently failed.")
            sys.exit()
Esempio n. 40
0
from azure.storage.blob import BlobService


blob_service = BlobService(account_name="<account_name>", account_key="<account_key>")

blob_service.create_container("datacontainer")

blob_service.create_container("datacontainer", x_ms_blob_public_access="container")

blob_service.set_container_acl("datacontainer", x_ms_blob_public_access="container")


blob_service.put_block_blob_from_path(
    "datacontainer", "datablob", "StorageClientPy.py", x_ms_blob_content_type="text/x-script.phyton"
)


blobs = []
marker = None
while True:
    batch = blob_service.list_blobs("datacontainer", marker=marker)
    blobs.extend(batch)
    if not batch.next_marker:
        break
    marker = batch.next_marker
for blob in blobs:
    print(blob.name)


blob_service.get_blob_to_path("datacontainer", "datablob", "out-StorageClientPy.py")
Esempio n. 41
0
while True:
    try:
        f = open('/sys/bus/w1/devices/28-031561b266ff/w1_slave', 'r') #open file and store to 'f'
		#converts file into a list
        list1 = list(f)
        list2 = list(list1[1])

        #creates a list of numbers and adds decimal to the right place
		temperature = list2[29:]
        del temperature[len(temperature)-1]
        temperature.insert(len(temperature)-3,'.')
		
		#converts list back to a string
        tempAsFloat = "".join(temperature)
        print tempAsFloat #prints temperature
		
        #required functions for sending temperature to azure cloud. account_name='blobs name', account_key='blobs key'
        blob_service = BlobService(account_name='*', account_key='*')
        #creates a container 'temperature'
        blob_service.create_container('temperature')
        #changes container permissions
        blob_service.set_container_acl('temperature', x_ms_blob_public_access='container')
        #'containers name', 'name of file sent/created to blob', 'name of variable or file path to file', 'BlockBlob'
        blob_service.put_blob('temperature', 'temperature', tempAsFloat, 'BlockBlob')
        time.sleep(10) #loops every 10 seconds to update temperature data in azure
    except:
        pass

f.close() #closes the opened temperature file
Esempio n. 42
0
class AzureJobStore(AbstractJobStore):
    """
    A job store that uses Azure's blob store for file storage and Table Service to store job info
    with strong consistency.
    """

    # Dots in container names should be avoided because container names are used in HTTPS bucket
    # URLs where the may interfere with the certificate common name. We use a double underscore
    # as a separator instead.
    #
    containerNameRe = re.compile(r'^[a-z0-9](-?[a-z0-9]+)+[a-z0-9]$')

    # See https://msdn.microsoft.com/en-us/library/azure/dd135715.aspx
    #
    minContainerNameLen = 3
    maxContainerNameLen = 63
    maxNameLen = 10
    nameSeparator = 'xx'  # Table names must be alphanumeric
    # Length of a jobID - used to test if a stats file has been read already or not
    jobIDLength = len(str(uuid.uuid4()))

    def __init__(self, locator, jobChunkSize=maxAzureTablePropertySize):
        super(AzureJobStore, self).__init__()
        accountName, namePrefix = locator.split(':', 1)
        if '--' in namePrefix:
            raise ValueError(
                "Invalid name prefix '%s'. Name prefixes may not contain %s." %
                (namePrefix, self.nameSeparator))
        if not self.containerNameRe.match(namePrefix):
            raise ValueError(
                "Invalid name prefix '%s'. Name prefixes must contain only digits, "
                "hyphens or lower-case letters and must not start or end in a "
                "hyphen." % namePrefix)
        # Reserve 13 for separator and suffix
        if len(namePrefix) > self.maxContainerNameLen - self.maxNameLen - len(
                self.nameSeparator):
            raise ValueError((
                "Invalid name prefix '%s'. Name prefixes may not be longer than 50 "
                "characters." % namePrefix))
        if '--' in namePrefix:
            raise ValueError(
                "Invalid name prefix '%s'. Name prefixes may not contain "
                "%s." % (namePrefix, self.nameSeparator))
        self.locator = locator
        self.jobChunkSize = jobChunkSize
        self.accountKey = _fetchAzureAccountKey(accountName)
        self.accountName = accountName
        # Table names have strict requirements in Azure
        self.namePrefix = self._sanitizeTableName(namePrefix)
        # These are the main API entry points.
        self.tableService = TableService(account_key=self.accountKey,
                                         account_name=accountName)
        self.blobService = BlobService(account_key=self.accountKey,
                                       account_name=accountName)
        # Serialized jobs table
        self.jobItems = None
        # Job<->file mapping table
        self.jobFileIDs = None
        # Container for all shared and unshared files
        self.files = None
        # Stats and logging strings
        self.statsFiles = None
        # File IDs that contain stats and logging strings
        self.statsFileIDs = None

    @property
    def keyPath(self):
        return self.config.cseKey

    def initialize(self, config):
        if self._jobStoreExists():
            raise JobStoreExistsException(self.locator)
        logger.debug("Creating job store at '%s'" % self.locator)
        self._bind(create=True)
        super(AzureJobStore, self).initialize(config)

    def resume(self):
        if not self._jobStoreExists():
            raise NoSuchJobStoreException(self.locator)
        logger.debug("Using existing job store at '%s'" % self.locator)
        self._bind(create=False)
        super(AzureJobStore, self).resume()

    def destroy(self):
        for name in 'jobItems', 'jobFileIDs', 'files', 'statsFiles', 'statsFileIDs':
            resource = getattr(self, name)
            if resource is not None:
                if isinstance(resource, AzureTable):
                    resource.delete_table()
                elif isinstance(resource, AzureBlobContainer):
                    resource.delete_container()
                else:
                    assert False
                setattr(self, name, None)

    def _jobStoreExists(self):
        """
        Checks if job store exists by querying the existence of the statsFileIDs table. Note that
        this is the last component that is deleted in :meth:`.destroy`.
        """
        for attempt in retry_azure():
            with attempt:
                try:
                    table = self.tableService.query_tables(
                        table_name=self._qualify('statsFileIDs'))
                except AzureMissingResourceHttpError as e:
                    if e.status_code == 404:
                        return False
                    else:
                        raise
                else:
                    return table is not None

    def _bind(self, create=False):
        table = self._bindTable
        container = self._bindContainer
        for name, binder in (('jobItems', table), ('jobFileIDs', table),
                             ('files', container), ('statsFiles', container),
                             ('statsFileIDs', table)):
            if getattr(self, name) is None:
                setattr(self, name, binder(self._qualify(name), create=create))

    def _qualify(self, name):
        return self.namePrefix + self.nameSeparator + name.lower()

    def jobs(self):

        # How many jobs have we done?
        total_processed = 0

        for jobEntity in self.jobItems.query_entities_auto():
            # Process the items in the page
            yield AzureJob.fromEntity(jobEntity)
            total_processed += 1

            if total_processed % 1000 == 0:
                # Produce some feedback for the user, because this can take
                # a long time on, for example, Azure
                logger.info("Processed %d total jobs" % total_processed)

        logger.info("Processed %d total jobs" % total_processed)

    def create(self,
               command,
               memory,
               cores,
               disk,
               preemptable,
               predecessorNumber=0):
        jobStoreID = self._newJobID()
        job = AzureJob(jobStoreID=jobStoreID,
                       command=command,
                       memory=memory,
                       cores=cores,
                       disk=disk,
                       preemptable=preemptable,
                       remainingRetryCount=self._defaultTryCount(),
                       logJobStoreFileID=None,
                       predecessorNumber=predecessorNumber)
        entity = job.toItem(chunkSize=self.jobChunkSize)
        entity['RowKey'] = jobStoreID
        self.jobItems.insert_entity(entity=entity)
        return job

    def exists(self, jobStoreID):
        if self.jobItems.get_entity(row_key=jobStoreID) is None:
            return False
        return True

    def load(self, jobStoreID):
        jobEntity = self.jobItems.get_entity(row_key=jobStoreID)
        if jobEntity is None:
            raise NoSuchJobException(jobStoreID)
        return AzureJob.fromEntity(jobEntity)

    def update(self, job):
        self.jobItems.update_entity(
            row_key=job.jobStoreID,
            entity=job.toItem(chunkSize=self.jobChunkSize))

    def delete(self, jobStoreID):
        try:
            self.jobItems.delete_entity(row_key=jobStoreID)
        except AzureMissingResourceHttpError:
            # Job deletion is idempotent, and this job has been deleted already
            return
        filterString = "PartitionKey eq '%s'" % jobStoreID
        for fileEntity in self.jobFileIDs.query_entities(filter=filterString):
            jobStoreFileID = fileEntity.RowKey
            self.deleteFile(jobStoreFileID)

    def getEnv(self):
        return dict(AZURE_ACCOUNT_KEY=self.accountKey)

    class BlobInfo(namedtuple('BlobInfo', ('account', 'container', 'name'))):
        @property
        @memoize
        def service(self):
            return BlobService(account_name=self.account,
                               account_key=_fetchAzureAccountKey(self.account))

    @classmethod
    def _readFromUrl(cls, url, writable):
        blob = cls._parseWasbUrl(url)
        blob.service.get_blob_to_file(container_name=blob.container,
                                      blob_name=blob.name,
                                      stream=writable)

    @classmethod
    def _writeToUrl(cls, readable, url):
        blob = cls._parseWasbUrl(url)
        blob.service.put_block_blob_from_file(container_name=blob.container,
                                              blob_name=blob.name,
                                              stream=readable)

    @classmethod
    def _parseWasbUrl(cls, url):
        """
        :param urlparse.ParseResult url: x
        :rtype: AzureJobStore.BlobInfo
        """
        assert url.scheme in ('wasb', 'wasbs')
        try:
            container, account = url.netloc.split('@')
        except ValueError:
            raise InvalidImportExportUrlException(url)
        suffix = '.blob.core.windows.net'
        if account.endswith(suffix):
            account = account[:-len(suffix)]
        else:
            raise InvalidImportExportUrlException(url)
        assert url.path[0] == '/'
        return cls.BlobInfo(account=account,
                            container=container,
                            name=url.path[1:])

    @classmethod
    def _supportsUrl(cls, url, export=False):
        return url.scheme.lower() in ('wasb', 'wasbs')

    def writeFile(self, localFilePath, jobStoreID=None):
        jobStoreFileID = self._newFileID()
        self.updateFile(jobStoreFileID, localFilePath)
        self._associateFileWithJob(jobStoreFileID, jobStoreID)
        return jobStoreFileID

    def updateFile(self, jobStoreFileID, localFilePath):
        with open(localFilePath) as read_fd:
            with self._uploadStream(jobStoreFileID, self.files) as write_fd:
                while True:
                    buf = read_fd.read(self._maxAzureBlockBytes)
                    write_fd.write(buf)
                    if len(buf) == 0:
                        break

    def readFile(self, jobStoreFileID, localFilePath):
        try:
            with self._downloadStream(jobStoreFileID, self.files) as read_fd:
                with open(localFilePath, 'w') as write_fd:
                    while True:
                        buf = read_fd.read(self._maxAzureBlockBytes)
                        write_fd.write(buf)
                        if not buf:
                            break
        except AzureMissingResourceHttpError:
            raise NoSuchFileException(jobStoreFileID)

    def deleteFile(self, jobStoreFileID):
        try:
            self.files.delete_blob(blob_name=jobStoreFileID)
            self._dissociateFileFromJob(jobStoreFileID)
        except AzureMissingResourceHttpError:
            pass

    def fileExists(self, jobStoreFileID):
        # As Azure doesn't have a blob_exists method (at least in the
        # python API) we just try to download the metadata, and hope
        # the metadata is small so the call will be fast.
        try:
            self.files.get_blob_metadata(blob_name=jobStoreFileID)
            return True
        except AzureMissingResourceHttpError:
            return False

    @contextmanager
    def writeFileStream(self, jobStoreID=None):
        # TODO: this (and all stream methods) should probably use the
        # Append Blob type, but that is not currently supported by the
        # Azure Python API.
        jobStoreFileID = self._newFileID()
        with self._uploadStream(jobStoreFileID, self.files) as fd:
            yield fd, jobStoreFileID
        self._associateFileWithJob(jobStoreFileID, jobStoreID)

    @contextmanager
    def updateFileStream(self, jobStoreFileID):
        with self._uploadStream(jobStoreFileID,
                                self.files,
                                checkForModification=True) as fd:
            yield fd

    def getEmptyFileStoreID(self, jobStoreID=None):
        jobStoreFileID = self._newFileID()
        self.files.put_blob(blob_name=jobStoreFileID,
                            blob='',
                            x_ms_blob_type='BlockBlob')
        self._associateFileWithJob(jobStoreFileID, jobStoreID)
        return jobStoreFileID

    @contextmanager
    def readFileStream(self, jobStoreFileID):
        if not self.fileExists(jobStoreFileID):
            raise NoSuchFileException(jobStoreFileID)
        with self._downloadStream(jobStoreFileID, self.files) as fd:
            yield fd

    @contextmanager
    def writeSharedFileStream(self, sharedFileName, isProtected=None):
        assert self._validateSharedFileName(sharedFileName)
        sharedFileID = self._newFileID(sharedFileName)
        with self._uploadStream(sharedFileID,
                                self.files,
                                encrypted=isProtected) as fd:
            yield fd

    @contextmanager
    def readSharedFileStream(self, sharedFileName):
        assert self._validateSharedFileName(sharedFileName)
        sharedFileID = self._newFileID(sharedFileName)
        if not self.fileExists(sharedFileID):
            raise NoSuchFileException(sharedFileID)
        with self._downloadStream(sharedFileID, self.files) as fd:
            yield fd

    def writeStatsAndLogging(self, statsAndLoggingString):
        # TODO: would be a great use case for the append blobs, once implemented in the Azure SDK
        jobStoreFileID = self._newFileID()
        encrypted = self.keyPath is not None
        if encrypted:
            statsAndLoggingString = encryption.encrypt(statsAndLoggingString,
                                                       self.keyPath)
        self.statsFiles.put_block_blob_from_text(
            blob_name=jobStoreFileID,
            text=statsAndLoggingString,
            x_ms_meta_name_values=dict(encrypted=str(encrypted)))
        self.statsFileIDs.insert_entity(entity={'RowKey': jobStoreFileID})

    def readStatsAndLogging(self, callback, readAll=False):
        suffix = '_old'
        numStatsFiles = 0
        for entity in self.statsFileIDs.query_entities():
            jobStoreFileID = entity.RowKey
            hasBeenRead = len(jobStoreFileID) > self.jobIDLength
            if not hasBeenRead:
                with self._downloadStream(jobStoreFileID,
                                          self.statsFiles) as fd:
                    callback(fd)
                # Mark this entity as read by appending the suffix
                self.statsFileIDs.insert_entity(
                    entity={'RowKey': jobStoreFileID + suffix})
                self.statsFileIDs.delete_entity(row_key=jobStoreFileID)
                numStatsFiles += 1
            elif readAll:
                # Strip the suffix to get the original ID
                jobStoreFileID = jobStoreFileID[:-len(suffix)]
                with self._downloadStream(jobStoreFileID,
                                          self.statsFiles) as fd:
                    callback(fd)
                numStatsFiles += 1
        return numStatsFiles

    _azureTimeFormat = "%Y-%m-%dT%H:%M:%SZ"

    def getPublicUrl(self, jobStoreFileID):
        try:
            self.files.get_blob_properties(blob_name=jobStoreFileID)
        except AzureMissingResourceHttpError:
            raise NoSuchFileException(jobStoreFileID)
        # Compensate of a little bit of clock skew
        startTimeStr = (datetime.utcnow() - timedelta(minutes=5)).strftime(
            self._azureTimeFormat)
        endTime = datetime.utcnow() + self.publicUrlExpiration
        endTimeStr = endTime.strftime(self._azureTimeFormat)
        sap = SharedAccessPolicy(
            AccessPolicy(startTimeStr, endTimeStr,
                         BlobSharedAccessPermissions.READ))
        sas_token = self.files.generate_shared_access_signature(
            blob_name=jobStoreFileID, shared_access_policy=sap)
        return self.files.make_blob_url(
            blob_name=jobStoreFileID) + '?' + sas_token

    def getSharedPublicUrl(self, sharedFileName):
        jobStoreFileID = self._newFileID(sharedFileName)
        return self.getPublicUrl(jobStoreFileID)

    def _newJobID(self):
        # raw UUIDs don't work for Azure property names because the '-' character is disallowed.
        return str(uuid.uuid4()).replace('-', '_')

    # A dummy job ID under which all shared files are stored.
    sharedFileJobID = uuid.UUID('891f7db6-e4d9-4221-a58e-ab6cc4395f94')

    def _newFileID(self, sharedFileName=None):
        if sharedFileName is None:
            ret = str(uuid.uuid4())
        else:
            ret = str(uuid.uuid5(self.sharedFileJobID, str(sharedFileName)))
        return ret.replace('-', '_')

    def _associateFileWithJob(self, jobStoreFileID, jobStoreID=None):
        if jobStoreID is not None:
            self.jobFileIDs.insert_entity(entity={
                'PartitionKey': jobStoreID,
                'RowKey': jobStoreFileID
            })

    def _dissociateFileFromJob(self, jobStoreFileID):
        entities = self.jobFileIDs.query_entities(filter="RowKey eq '%s'" %
                                                  jobStoreFileID)
        if entities:
            assert len(entities) == 1
            jobStoreID = entities[0].PartitionKey
            self.jobFileIDs.delete_entity(partition_key=jobStoreID,
                                          row_key=jobStoreFileID)

    def _bindTable(self, tableName, create=False):
        for attempt in retry_azure():
            with attempt:
                try:
                    tables = self.tableService.query_tables(
                        table_name=tableName)
                except AzureMissingResourceHttpError as e:
                    if e.status_code != 404:
                        raise
                else:
                    if tables:
                        assert tables[0].name == tableName
                        return AzureTable(self.tableService, tableName)
                if create:
                    self.tableService.create_table(tableName)
                    return AzureTable(self.tableService, tableName)
                else:
                    return None

    def _bindContainer(self, containerName, create=False):
        for attempt in retry_azure():
            with attempt:
                try:
                    self.blobService.get_container_properties(containerName)
                except AzureMissingResourceHttpError as e:
                    if e.status_code == 404:
                        if create:
                            self.blobService.create_container(containerName)
                        else:
                            return None
                    else:
                        raise
        return AzureBlobContainer(self.blobService, containerName)

    def _sanitizeTableName(self, tableName):
        """
        Azure table names must start with a letter and be alphanumeric.

        This will never cause a collision if uuids are used, but
        otherwise may not be safe.
        """
        return 'a' + filter(lambda x: x.isalnum(), tableName)

    # Maximum bytes that can be in any block of an Azure block blob
    # https://github.com/Azure/azure-storage-python/blob/4c7666e05a9556c10154508335738ee44d7cb104/azure/storage/blob/blobservice.py#L106
    _maxAzureBlockBytes = 4 * 1024 * 1024

    @contextmanager
    def _uploadStream(self,
                      jobStoreFileID,
                      container,
                      checkForModification=False,
                      encrypted=None):
        """
        :param encrypted: True to enforce encryption (will raise exception unless key is set),
        False to prevent encryption or None to encrypt if key is set.
        """
        if checkForModification:
            try:
                expectedVersion = container.get_blob_properties(
                    blob_name=jobStoreFileID)['etag']
            except AzureMissingResourceHttpError:
                expectedVersion = None

        if encrypted is None:
            encrypted = self.keyPath is not None
        elif encrypted:
            if self.keyPath is None:
                raise RuntimeError(
                    'Encryption requested but no key was provided')

        maxBlockSize = self._maxAzureBlockBytes
        if encrypted:
            # There is a small overhead for encrypted data.
            maxBlockSize -= encryption.overhead

        store = self

        class UploadPipe(WritablePipe):
            def readFrom(self, readable):
                blockIDs = []
                try:
                    while True:
                        buf = readable.read(maxBlockSize)
                        if len(buf) == 0:
                            # We're safe to break here even if we never read anything, since
                            # putting an empty block list creates an empty blob.
                            break
                        if encrypted:
                            buf = encryption.encrypt(buf, store.keyPath)
                        blockID = store._newFileID()
                        container.put_block(blob_name=jobStoreFileID,
                                            block=buf,
                                            blockid=blockID)
                        blockIDs.append(blockID)
                except:
                    with panic(log=logger):
                        # This is guaranteed to delete any uncommitted blocks.
                        container.delete_blob(blob_name=jobStoreFileID)

                if checkForModification and expectedVersion is not None:
                    # Acquire a (60-second) write lock,
                    leaseID = container.lease_blob(
                        blob_name=jobStoreFileID,
                        x_ms_lease_action='acquire')['x-ms-lease-id']
                    # check for modification,
                    blobProperties = container.get_blob_properties(
                        blob_name=jobStoreFileID)
                    if blobProperties['etag'] != expectedVersion:
                        container.lease_blob(blob_name=jobStoreFileID,
                                             x_ms_lease_action='release',
                                             x_ms_lease_id=leaseID)
                        raise ConcurrentFileModificationException(
                            jobStoreFileID)
                    # commit the file,
                    container.put_block_list(
                        blob_name=jobStoreFileID,
                        block_list=blockIDs,
                        x_ms_lease_id=leaseID,
                        x_ms_meta_name_values=dict(encrypted=str(encrypted)))
                    # then release the lock.
                    container.lease_blob(blob_name=jobStoreFileID,
                                         x_ms_lease_action='release',
                                         x_ms_lease_id=leaseID)
                else:
                    # No need to check for modification, just blindly write over whatever
                    # was there.
                    container.put_block_list(
                        blob_name=jobStoreFileID,
                        block_list=blockIDs,
                        x_ms_meta_name_values=dict(encrypted=str(encrypted)))

        with UploadPipe() as writable:
            yield writable

    @contextmanager
    def _downloadStream(self, jobStoreFileID, container):
        # The reason this is not in the writer is so we catch non-existant blobs early

        blobProps = container.get_blob_properties(blob_name=jobStoreFileID)

        encrypted = strict_bool(blobProps['x-ms-meta-encrypted'])
        if encrypted and self.keyPath is None:
            raise AssertionError(
                'Content is encrypted but no key was provided.')

        outer_self = self

        class DownloadPipe(ReadablePipe):
            def writeTo(self, writable):
                chunkStart = 0
                fileSize = int(blobProps['Content-Length'])
                while chunkStart < fileSize:
                    chunkEnd = chunkStart + outer_self._maxAzureBlockBytes - 1
                    buf = container.get_blob(blob_name=jobStoreFileID,
                                             x_ms_range="bytes=%d-%d" %
                                             (chunkStart, chunkEnd))
                    if encrypted:
                        buf = encryption.decrypt(buf, outer_self.keyPath)
                    writable.write(buf)
                    chunkStart = chunkEnd + 1

        with DownloadPipe() as readable:
            yield readable
Esempio n. 43
0
# if there is no video key return
if len(sys.argv) <= 2:
    exit()

print "=== Uploading to Azure ==="

video_key = sys.argv[2]

# walk all files in dir and push to bucket
key = raw_input("Please enter azure vidoepath blob storage key: ")
blob_service = BlobService(account_name='videopathmobilefiles',
                           account_key=key)
basepath = os.path.dirname(os.path.abspath(__file__)) + "/" + output_folder
container_name = video_key.lower()
blob_service.create_container(container_name,
                              x_ms_blob_public_access='container')

# collect files for uploading
filepaths = []
for path, subdirs, files in os.walk(basepath):
    for name in files:
        # don't upload hidden files
        if name[0] == ".":
            continue
        pathname = os.path.join(path, name)
        filepaths.append(pathname)


# uploading thread
def upload_files(filepaths):
    while len(filepaths):
Esempio n. 44
0
def module_impl(rm, log, params, check_mode=False):

    if not HAS_AZURE:
        raise Exception("The Azure python sdk is not installed (try 'pip install azure')")

    if not HAS_REQUESTS:
        raise Exception("The requests python module is not installed (try 'pip install requests')")

    resource_group = params.get('resource_group')
    account_name = params.get('account_name')
    container_name = params.get('container_name')
    mode = params.get('mode')
    x_ms_meta_name_values = params.get('x_ms_meta_name_values')
    x_ms_blob_public_access = params.get('x_ms_blob_public_access')
    x_ms_blob_cache_control = params.get('x_ms_blob_cache_control')
    x_ms_blob_content_encoding = params.get('x_ms_blob_content_encoding')
    x_ms_blob_content_language = params.get('x_ms_blob_content_language')
    x_ms_blob_content_type = params.get('x_ms_blob_content_type')
    prefix = params.get('prefix')
    marker = params.get('marker')
    max_results = params.get('max_results')
    blob_name = params.get('blob_name')
    file_path = params.get('file_path')
    overwrite = params.get('overwrite')
    permissions = params.get('permissions')
    hours = params.get('hours')
    days = params.get('days')
    access_token = params.get('access_token')

    results = dict(changed=False)

    storage_client = rm.storage_client
    
    if not resource_group:
        raise Exception("Parameter error: resource_group cannot be None.")
    
    if not account_name:
        raise Exception("Parameter error: account_name cannot be None.")

    if not container_name:
        raise Exception("Parameter error: container_name cannot be None.")

    if not NAME_PATTERN.match(container_name):
        raise Exception("Parameter error: container_name must consist of lowercase letters, numbers and hyphens. It must begin with " +
            "a letter or number. It may not contain two consecutive hyphens.")

    # add file path validation

    results['account_name'] = account_name
    results['resource_group'] = resource_group 
    results['container_name'] = container_name

    # put (upload), get (download), geturl (return download url (Ansible 1.3+), getstr (download object as string (1.3+)), list (list keys (2.0+)), create (bucket), delete (bucket), and delobj (delete object)
    try:
        log('Getting keys')
        keys = {}
        response = storage_client.storage_accounts.list_keys(resource_group, account_name)
        keys[KeyName.key1] = response.storage_account_keys.key1
        keys[KeyName.key2] = response.storage_account_keys.key2
    except AzureHttpError as e:
        log('Error getting keys for account %s' % account_name)
        raise Exception(str(e.message))

    try:
        log('Create blob service')
        bs = BlobService(account_name, keys[KeyName.key1])
    except Exception as e:
        log('Error creating blob service.')
        raise Exception(str(e.args[0]))

    if mode == 'create':
        container = get_container_facts(bs, container_name)
        if container is not None:
            # container exists
            results['container'] = container
            results['msg'] = "Container already exists."
            return results
        # create the container
        if not check_mode:
            log('Create container %s' % container_name)
            bs.create_container(container_name, x_ms_meta_name_values, x_ms_blob_public_access)
            results['container'] = get_container_facts(bs, container_name)
        results['msg'] = "Container created successfully."
        results['changed'] = True
        return results

    if mode == 'update':
        container = get_container_facts(bs, container_name)
        if container is None:
            # container does not exist
            if not check_mode:
                log('Create container %s' % container_name)
                bs.create_container(container_name, x_ms_meta_name_values, x_ms_blob_public_access)
            results['changed'] = True
            results['msg'] = 'Container created successfully.'
            return results     
        # update existing container
        results['msg'] = "Container not changed."
        if x_ms_meta_name_values:
            if not check_mode:
                log('Update x_ms_meta_name_values for container %s' % container_name)
                bs.set_container_metadata(container_name, x_ms_meta_name_values)
            results['changed'] = True
            results['msg'] = 'Container meta data updated successfully.'
        if x_ms_blob_public_access:
            access = x_ms_blob_public_access
            if x_ms_blob_public_access == 'private':
                access = None
            if not check_mode:
                log('Set access to %s for container %s' % (access, container_name))
                bs.set_container_acl(container_name=container_name, x_ms_blob_public_access=access)
            results['changed'] = True
            results['msg'] = 'Container ACL updated successfully.'
        if permissions:
            if hours == 0 and days == 0:
                raise Exception("Parameter error: expecting hours > 0 or days > 0")
            id = "%s-%s" % (container_name, permissions) 
            si = get_identifier(id, hours, days, permissions)
            identifiers = SignedIdentifiers()
            identifiers.signed_identifiers.append(si)
            if not check_mode:
                log('Set permissions to %s for container %s' % (permissions, container_name))
                bs.set_container_acl(container_name=container_name,signed_identifiers=identifiers)
            results['changed'] = True
            results['msg'] = 'Container ACL updated successfully.'
        results['container'] = get_container_facts(bs, container_name)
        return results

    if mode == 'delete':
        container = get_container_facts(bs, container_name)
        if container is None:
            results['msg'] = "Container %s could not be found." % container_name
            return results
        if not check_mode:
            log('Deleting container %s' % container_name)
            bs.delete_container(container_name)
        results['changed'] = True
        results['msg'] = 'Container deleted successfully.'
        return results

    if mode == 'delete_blob':
        if blob_name is None:
            raise Exception("Parameter error: blob_name cannot be None.")
        
        container = container_check(bs, container_name)
        blob = get_blob_facts(bs, container_name, blob_name)

        if not blob:
            results['msg'] = 'Blob %s could not be found in container %s.' % (blob_name, container_name)
            return results

        if not check_mode:
            log('Deleteing %s from container %s.' % (blob_name, container_name))
            bs.delete_blob(container_name, blob_name)
        results['changed'] = True
        results['msg'] = 'Blob successfully deleted.'
        return results

    if mode == 'put':
        if not blob_name:
            raise Exception("Parameter error: blob_name cannot be None.")

        if not file_path :
            raise Exception("Parameter error: file_path cannot be None.")

        if not path_check(file_path):
            raise Exception("File %s does not exist." % file_path)

        container = get_container_facts(bs, container_name)
        blob = None
        if container is not None:
            blob = get_blob_facts(bs, container_name, blob_name)

        if container is not None and blob is not None:
            # both container and blob already exist
            md5_remote = blob['content-md5']
            md5_local = get_md5(file_path)
            results['container'] = container
            results['blob'] = blob

            if md5_local == md5_remote:
                sum_matches = True
                results['msg'] = 'File checksums match. File not uploaded.'
                if overwrite == 'always':
                    if not check_mode:
                        log('Uploading %s to container %s.' % (file_path, container_name))
                        put_block_blob(
                            bs,
                            container_name,
                            blob_name,
                            file_path,
                            x_ms_meta_name_values,
                            x_ms_blob_cache_control,
                            x_ms_blob_content_encoding,
                            x_ms_blob_content_language,
                            x_ms_blob_content_type
                        )
                        results['blob'] = get_blob_facts(bs, container_name, blob_name)
                    results['changed'] = True
                    results['msg'] = 'File successfully uploaded.'
            else:
                sum_matches = False
                if overwrite in ('always', 'different'):
                    if not check_mode:
                        log('Uploading %s to container %s.' % (file_path, container_name))
                        put_block_blob(
                            bs,
                            container_name,
                            blob_name,
                            file_path,
                            x_ms_meta_name_values,
                            x_ms_blob_cache_control,
                            x_ms_blob_content_encoding,
                            x_ms_blob_content_language,
                            x_ms_blob_content_type
                        )
                        results['blob'] = get_blob_facts(bs, container_name, blob_name)
                    results['changed'] = True
                    results['msg'] = 'File successfully uploaded.'
                else:
                    results['msg'] = "WARNING: Checksums do not match. Use overwrite parameter to force upload."
            return results

        if container is None:
            # container does not exist. create container and upload.
            if not check_mode:
                log('Creating container %s.' % container_name)
                bs.create_container(container_name, x_ms_meta_name_values, x_ms_blob_public_access)
                log('Uploading %s to container %s.' % (file_path, container_name))
                put_block_blob(
                    bs,
                    container_name,
                    blob_name,
                    file_path,
                    x_ms_meta_name_values,
                    x_ms_blob_cache_control,
                    x_ms_blob_content_encoding,
                    x_ms_blob_content_language,
                    x_ms_blob_content_type
                )
                results['conainer'] = get_container_facts(bs, container_name)
                results['blob'] = get_blob_facts(bs, container_name, blob_name)
            results['changed'] = True
            results['msg'] = 'Successfully created container and uploaded file.'
            return results

        if container is not None:
            # container exists. just upload.
            if not check_mode:
                log('Uploading %s to container %s.' % (file_path, container_name))
                put_block_blob(
                    bs,
                    container_name,
                    blob_name,
                    file_path,
                    x_ms_meta_name_values,
                    x_ms_blob_cache_control,
                    x_ms_blob_content_encoding,
                    x_ms_blob_content_language,
                    x_ms_blob_content_type
                )
                results['blob'] = get_blob_facts(bs, container_name, blob_name)
            results['changed'] = True
            results['msg'] = 'Successfully updloaded file.'
            return results

    if mode == 'list':
        container = container_check(bs, container_name)
        response = bs.list_blobs(
            container_name,
            prefix,
            marker,
            max_results
        )
        results['blobs'] = []
        for blob in response.blobs:
            b = dict(
                name = blob.name,
                snapshot = blob.snapshot,
                last_modified = blob.properties.last_modified,
                content_length = blob.properties.content_length,
                blob_type = blob.properties.blob_type,
            )
            results['blobs'].append(b)
        return results

    if mode == 'get':
        if file_path is None:
            raise Exception("Parameter error: file_path cannot be None.")
        
        container = container_check(bs, container_name)
        blob = blob_check(bs, container_name, blob_name)
        path_exists = path_check(file_path)
        
        if not path_exists or overwrite == 'always':
            if not check_mode:
                bs.get_blob_to_path(container_name, blob_name, file_path)
            results['changed'] = True
            results['msg'] = "Blob %s successfully downloaded to %s." % (blob_name, file_path)
            return results

        if path_exists:
            md5_remote = blob['content-md5']
            md5_local = get_md5(file_path)

            if md5_local == md5_remote:
                sum_matches = True
                if overwrite == 'always':
                    if not check_mode:
                        bs.get_blob_to_path(container_name, blob_name, file_path)
                    results['changed'] = True
                    results['msg'] = "Blob %s successfully downloaded to %s." % (blob_name, file_path)
                else:
                    results['msg'] = "Local and remote object are identical, ignoring. Use overwrite parameter to force."
            else:
                sum_matches = False
                if overwrite in ('always', 'different'):
                    if not check_mode:
                        bs.get_blob_to_path(container_name, blob_name, file_path)
                    results['changed'] = True
                    results['msg'] = "Blob %s successfully downloaded to %s." % (blob_name, file_path)
                else:
                    results['msg'] ="WARNING: Checksums do not match. Use overwrite parameter to force download."
        
        if sum_matches is True and overwrite == 'never':
            results['msg'] = "Local and remote object are identical, ignoring. Use overwrite parameter to force."
        
        return results

    if mode == 'get_url':
        if not blob_name:
            raise Exception("Parameter error: blob_name cannot be None.")

        container = container_check(bs, container_name)
        blob = blob_check(bs, container_name, blob_name)

        url = bs.make_blob_url(
            container_name=container_name,
            blob_name=blob_name,
            sas_token=access_token)
        results['url'] = url
        results['msg'] = "Url: %s" % url
        return results

    if mode == 'get_token':
        if hours == 0 and days == 0:
            raise Exception("Parameter error: expecting hours > 0 or days > 0")
        container = container_check(bs, container_name)
        blob = blob_check(bs, container_name, blob_name)
        results['blob_name'] = blob_name
        sap = get_shared_access_policy(permissions, hours=hours, days=days)
        token = bs.generate_shared_access_signature(container_name, blob_name, sap)
        results['access_token'] = token
        return results
Esempio n. 45
0
class AzureJobStore(AbstractJobStore):
    """
    A job store that uses Azure's blob store for file storage and
    Table Service to store job info with strong consistency."""

    @classmethod
    def loadOrCreateJobStore(cls, jobStoreString, config=None, **kwargs):
        account, namePrefix = jobStoreString.split(':', 1)
        if '--' in namePrefix:
            raise ValueError("Invalid name prefix '%s'. Name prefixes may not contain "
                             "%s." % (namePrefix, cls.nameSeparator))

        if not cls.containerNameRe.match(namePrefix):
            raise ValueError("Invalid name prefix '%s'. Name prefixes must contain only digits, "
                             "hyphens or lower-case letters and must not start or end in a "
                             "hyphen." % namePrefix)

        # Reserve 13 for separator and suffix
        if len(namePrefix) > cls.maxContainerNameLen - cls.maxNameLen - len(cls.nameSeparator):
            raise ValueError(("Invalid name prefix '%s'. Name prefixes may not be longer than 50 "
                              "characters." % namePrefix))

        if '--' in namePrefix:
            raise ValueError("Invalid name prefix '%s'. Name prefixes may not contain "
                             "%s." % (namePrefix, cls.nameSeparator))

        return cls(account, namePrefix, config=config, **kwargs)

    # Dots in container names should be avoided because container names are used in HTTPS bucket
    # URLs where the may interfere with the certificate common name. We use a double
    # underscore as a separator instead.
    #
    containerNameRe = re.compile(r'^[a-z0-9](-?[a-z0-9]+)+[a-z0-9]$')

    # See https://msdn.microsoft.com/en-us/library/azure/dd135715.aspx
    #
    minContainerNameLen = 3
    maxContainerNameLen = 63
    maxNameLen = 10
    nameSeparator = 'xx'  # Table names must be alphanumeric

    # Do not invoke the constructor, use the factory method above.

    def __init__(self, accountName, namePrefix, config=None,
                 jobChunkSize=maxAzureTablePropertySize):
        self.jobChunkSize = jobChunkSize
        self.keyPath = None

        self.account_key = _fetchAzureAccountKey(accountName)
        self.accountName = accountName
        # Table names have strict requirements in Azure
        self.namePrefix = self._sanitizeTableName(namePrefix)
        logger.debug("Creating job store with name prefix '%s'" % self.namePrefix)

        # These are the main API entrypoints.
        self.tableService = TableService(account_key=self.account_key, account_name=accountName)
        self.blobService = BlobService(account_key=self.account_key, account_name=accountName)

        # Register our job-store in the global table for this storage account
        self.registryTable = self._getOrCreateTable('toilRegistry')
        exists = self.registryTable.get_entity(row_key=self.namePrefix)
        self._checkJobStoreCreation(config is not None, exists, accountName + ":" + self.namePrefix)
        self.registryTable.insert_or_replace_entity(row_key=self.namePrefix,
                                                    entity={'exists': True})

        # Serialized jobs table
        self.jobItems = self._getOrCreateTable(self.qualify('jobs'))
        # Job<->file mapping table
        self.jobFileIDs = self._getOrCreateTable(self.qualify('jobFileIDs'))

        # Container for all shared and unshared files
        self.files = self._getOrCreateBlobContainer(self.qualify('files'))

        # Stats and logging strings
        self.statsFiles = self._getOrCreateBlobContainer(self.qualify('statsfiles'))
        # File IDs that contain stats and logging strings
        self.statsFileIDs = self._getOrCreateTable(self.qualify('statsFileIDs'))

        super(AzureJobStore, self).__init__(config=config)

        if self.config.cseKey is not None:
            self.keyPath = self.config.cseKey

    # Length of a jobID - used to test if a stats file has been read already or not
    jobIDLength = len(str(uuid.uuid4()))

    def qualify(self, name):
        return self.namePrefix + self.nameSeparator + name

    def jobs(self):

        # How many jobs have we done?
        total_processed = 0

        for jobEntity in self.jobItems.query_entities_auto():
            # Process the items in the page
            yield AzureJob.fromEntity(jobEntity)
            total_processed += 1

            if total_processed % 1000 == 0:
                # Produce some feedback for the user, because this can take
                # a long time on, for example, Azure
                logger.info("Processed %d total jobs" % total_processed)

        logger.info("Processed %d total jobs" % total_processed)

    def create(self, command, memory, cores, disk, preemptable, predecessorNumber=0):
        jobStoreID = self._newJobID()
        job = AzureJob(jobStoreID=jobStoreID, command=command,
                       memory=memory, cores=cores, disk=disk, preemptable=preemptable,
                       remainingRetryCount=self._defaultTryCount(), logJobStoreFileID=None,
                       predecessorNumber=predecessorNumber)
        entity = job.toItem(chunkSize=self.jobChunkSize)
        entity['RowKey'] = jobStoreID
        self.jobItems.insert_entity(entity=entity)
        return job

    def exists(self, jobStoreID):
        if self.jobItems.get_entity(row_key=jobStoreID) is None:
            return False
        return True

    def load(self, jobStoreID):
        jobEntity = self.jobItems.get_entity(row_key=jobStoreID)
        if jobEntity is None:
            raise NoSuchJobException(jobStoreID)
        return AzureJob.fromEntity(jobEntity)

    def update(self, job):
        self.jobItems.update_entity(row_key=job.jobStoreID,
                                    entity=job.toItem(chunkSize=self.jobChunkSize))

    def delete(self, jobStoreID):
        try:
            self.jobItems.delete_entity(row_key=jobStoreID)
        except AzureMissingResourceHttpError:
            # Job deletion is idempotent, and this job has been deleted already
            return
        filterString = "PartitionKey eq '%s'" % jobStoreID
        for fileEntity in self.jobFileIDs.query_entities(filter=filterString):
            jobStoreFileID = fileEntity.RowKey
            self.deleteFile(jobStoreFileID)

    def deleteJobStore(self):
        self.registryTable.delete_entity(row_key=self.namePrefix)
        self.jobItems.delete_table()
        self.jobFileIDs.delete_table()
        self.files.delete_container()
        self.statsFiles.delete_container()
        self.statsFileIDs.delete_table()

    def getEnv(self):
        return dict(AZURE_ACCOUNT_KEY=self.account_key)

    @classmethod
    def _readFromUrl(cls, url, writable):
        blobService, containerName, blobName = cls._extractBlobInfoFromUrl(url)
        blobService.get_blob_to_file(containerName, blobName, writable)

    @classmethod
    def _writeToUrl(cls, readable, url):
        blobService, containerName, blobName = cls._extractBlobInfoFromUrl(url)
        blobService.put_block_blob_from_file(containerName, blobName, readable)
        blobService.get_blob(containerName, blobName)

    @staticmethod
    def _extractBlobInfoFromUrl(url):
        """
        :return: (blobService, containerName, blobName)
        """

        def invalidUrl():
            raise RuntimeError("The URL '%s' is invalid" % url.geturl())

        netloc = url.netloc.split('@')
        if len(netloc) != 2:
            invalidUrl()

        accountEnd = netloc[1].find('.blob.core.windows.net')
        if accountEnd == -1:
            invalidUrl()

        containerName, accountName = netloc[0], netloc[1][0:accountEnd]
        blobName = url.path[1:]  # urlparse always includes a leading '/'
        blobService = BlobService(account_key=_fetchAzureAccountKey(accountName),
                                  account_name=accountName)
        return blobService, containerName, blobName

    @classmethod
    def _supportsUrl(cls, url, export=False):
        return url.scheme.lower() == 'wasb' or url.scheme.lower() == 'wasbs'

    def writeFile(self, localFilePath, jobStoreID=None):
        jobStoreFileID = self._newFileID()
        self.updateFile(jobStoreFileID, localFilePath)
        self._associateFileWithJob(jobStoreFileID, jobStoreID)
        return jobStoreFileID

    def updateFile(self, jobStoreFileID, localFilePath):
        with open(localFilePath) as read_fd:
            with self._uploadStream(jobStoreFileID, self.files) as write_fd:
                while True:
                    buf = read_fd.read(self._maxAzureBlockBytes)
                    write_fd.write(buf)
                    if len(buf) == 0:
                        break

    def readFile(self, jobStoreFileID, localFilePath):
        try:
            with self._downloadStream(jobStoreFileID, self.files) as read_fd:
                with open(localFilePath, 'w') as write_fd:
                    while True:
                        buf = read_fd.read(self._maxAzureBlockBytes)
                        write_fd.write(buf)
                        if not buf: break
        except AzureMissingResourceHttpError:
            raise NoSuchFileException(jobStoreFileID)

    def deleteFile(self, jobStoreFileID):
        try:
            self.files.delete_blob(blob_name=jobStoreFileID)
            self._dissociateFileFromJob(jobStoreFileID)
        except AzureMissingResourceHttpError:
            pass

    def fileExists(self, jobStoreFileID):
        # As Azure doesn't have a blob_exists method (at least in the
        # python API) we just try to download the metadata, and hope
        # the metadata is small so the call will be fast.
        try:
            self.files.get_blob_metadata(blob_name=jobStoreFileID)
            return True
        except AzureMissingResourceHttpError:
            return False

    @contextmanager
    def writeFileStream(self, jobStoreID=None):
        # TODO: this (and all stream methods) should probably use the
        # Append Blob type, but that is not currently supported by the
        # Azure Python API.
        jobStoreFileID = self._newFileID()
        with self._uploadStream(jobStoreFileID, self.files) as fd:
            yield fd, jobStoreFileID
        self._associateFileWithJob(jobStoreFileID, jobStoreID)

    @contextmanager
    def updateFileStream(self, jobStoreFileID):
        with self._uploadStream(jobStoreFileID, self.files, checkForModification=True) as fd:
            yield fd

    def getEmptyFileStoreID(self, jobStoreID=None):
        jobStoreFileID = self._newFileID()
        self.files.put_blob(blob_name=jobStoreFileID, blob='',
                            x_ms_blob_type='BlockBlob')
        self._associateFileWithJob(jobStoreFileID, jobStoreID)
        return jobStoreFileID

    @contextmanager
    def readFileStream(self, jobStoreFileID):
        if not self.fileExists(jobStoreFileID):
            raise NoSuchFileException(jobStoreFileID)
        with self._downloadStream(jobStoreFileID, self.files) as fd:
            yield fd

    @contextmanager
    def writeSharedFileStream(self, sharedFileName, isProtected=None):
        assert self._validateSharedFileName(sharedFileName)
        sharedFileID = self._newFileID(sharedFileName)
        with self._uploadStream(sharedFileID, self.files, encrypted=isProtected) as fd:
            yield fd

    @contextmanager
    def readSharedFileStream(self, sharedFileName):
        assert self._validateSharedFileName(sharedFileName)
        sharedFileID = self._newFileID(sharedFileName)
        if not self.fileExists(sharedFileID):
            raise NoSuchFileException(sharedFileID)
        with self._downloadStream(sharedFileID, self.files) as fd:
            yield fd

    def writeStatsAndLogging(self, statsAndLoggingString):
        # TODO: would be a great use case for the append blobs, once implemented in the Azure SDK
        jobStoreFileID = self._newFileID()
        encrypted = self.keyPath is not None
        if encrypted:
            statsAndLoggingString = encryption.encrypt(statsAndLoggingString, self.keyPath)
        self.statsFiles.put_block_blob_from_text(blob_name=jobStoreFileID,
                                                 text=statsAndLoggingString,
                                                 x_ms_meta_name_values=dict(
                                                     encrypted=str(encrypted)))
        self.statsFileIDs.insert_entity(entity={'RowKey': jobStoreFileID})

    def readStatsAndLogging(self, callback, readAll=False):
        suffix = '_old'
        numStatsFiles = 0
        for entity in self.statsFileIDs.query_entities():
            jobStoreFileID = entity.RowKey
            hasBeenRead = len(jobStoreFileID) > self.jobIDLength
            if not hasBeenRead:
                with self._downloadStream(jobStoreFileID, self.statsFiles) as fd:
                    callback(fd)
                # Mark this entity as read by appending the suffix
                self.statsFileIDs.insert_entity(entity={'RowKey': jobStoreFileID + suffix})
                self.statsFileIDs.delete_entity(row_key=jobStoreFileID)
                numStatsFiles += 1
            elif readAll:
                # Strip the suffix to get the original ID
                jobStoreFileID = jobStoreFileID[:-len(suffix)]
                with self._downloadStream(jobStoreFileID, self.statsFiles) as fd:
                    callback(fd)
                numStatsFiles += 1
        return numStatsFiles

    _azureTimeFormat = "%Y-%m-%dT%H:%M:%SZ"

    def getPublicUrl(self, jobStoreFileID):
        try:
            self.files.get_blob_properties(blob_name=jobStoreFileID)
        except AzureMissingResourceHttpError:
            raise NoSuchFileException(jobStoreFileID)
        # Compensate of a little bit of clock skew
        startTimeStr = (datetime.utcnow() - timedelta(minutes=5)).strftime(self._azureTimeFormat)
        endTime = datetime.utcnow() + self.publicUrlExpiration
        endTimeStr = endTime.strftime(self._azureTimeFormat)
        sap = SharedAccessPolicy(AccessPolicy(startTimeStr, endTimeStr,
                                              BlobSharedAccessPermissions.READ))
        sas_token = self.files.generate_shared_access_signature(blob_name=jobStoreFileID,
                                                                shared_access_policy=sap)
        return self.files.make_blob_url(blob_name=jobStoreFileID) + '?' + sas_token

    def getSharedPublicUrl(self, sharedFileName):
        jobStoreFileID = self._newFileID(sharedFileName)
        return self.getPublicUrl(jobStoreFileID)

    def _newJobID(self):
        # raw UUIDs don't work for Azure property names because the '-' character is disallowed.
        return str(uuid.uuid4()).replace('-', '_')

    # A dummy job ID under which all shared files are stored.
    sharedFileJobID = uuid.UUID('891f7db6-e4d9-4221-a58e-ab6cc4395f94')

    def _newFileID(self, sharedFileName=None):
        if sharedFileName is None:
            ret = str(uuid.uuid4())
        else:
            ret = str(uuid.uuid5(self.sharedFileJobID, str(sharedFileName)))
        return ret.replace('-', '_')

    def _associateFileWithJob(self, jobStoreFileID, jobStoreID=None):
        if jobStoreID is not None:
            self.jobFileIDs.insert_entity(entity={'PartitionKey': jobStoreID,
                                                  'RowKey': jobStoreFileID})

    def _dissociateFileFromJob(self, jobStoreFileID):
        entities = self.jobFileIDs.query_entities(filter="RowKey eq '%s'" % jobStoreFileID)
        if entities:
            assert len(entities) == 1
            jobStoreID = entities[0].PartitionKey
            self.jobFileIDs.delete_entity(partition_key=jobStoreID, row_key=jobStoreFileID)

    def _getOrCreateTable(self, tableName):
        # This will not fail if the table already exists.
        for attempt in retry_azure():
            with attempt:
                self.tableService.create_table(tableName)
        return AzureTable(self.tableService, tableName)

    def _getOrCreateBlobContainer(self, containerName):
        for attempt in retry_azure():
            with attempt:
                self.blobService.create_container(containerName)
        return AzureBlobContainer(self.blobService, containerName)

    def _sanitizeTableName(self, tableName):
        """
        Azure table names must start with a letter and be alphanumeric.

        This will never cause a collision if uuids are used, but
        otherwise may not be safe.
        """
        return 'a' + filter(lambda x: x.isalnum(), tableName)

    # Maximum bytes that can be in any block of an Azure block blob
    # https://github.com/Azure/azure-storage-python/blob/4c7666e05a9556c10154508335738ee44d7cb104/azure/storage/blob/blobservice.py#L106
    _maxAzureBlockBytes = 4 * 1024 * 1024

    @contextmanager
    def _uploadStream(self, jobStoreFileID, container, checkForModification=False, encrypted=None):
        """
        :param encrypted: True to enforce encryption (will raise exception unless key is set),
        False to prevent encryption or None to encrypt if key is set.
        """
        if checkForModification:
            try:
                expectedVersion = container.get_blob_properties(blob_name=jobStoreFileID)['etag']
            except AzureMissingResourceHttpError:
                expectedVersion = None

        if encrypted is None:
            encrypted = self.keyPath is not None
        elif encrypted:
            if self.keyPath is None:
                raise RuntimeError('Encryption requested but no key was provided')

        maxBlockSize = self._maxAzureBlockBytes
        if encrypted:
            # There is a small overhead for encrypted data.
            maxBlockSize -= encryption.overhead
        readable_fh, writable_fh = os.pipe()
        with os.fdopen(readable_fh, 'r') as readable:
            with os.fdopen(writable_fh, 'w') as writable:
                def reader():
                    blockIDs = []
                    try:
                        while True:
                            buf = readable.read(maxBlockSize)
                            if len(buf) == 0:
                                # We're safe to break here even if we never read anything, since
                                # putting an empty block list creates an empty blob.
                                break
                            if encrypted:
                                buf = encryption.encrypt(buf, self.keyPath)
                            blockID = self._newFileID()
                            container.put_block(blob_name=jobStoreFileID,
                                                block=buf,
                                                blockid=blockID)
                            blockIDs.append(blockID)
                    except:
                        # This is guaranteed to delete any uncommitted
                        # blocks.
                        container.delete_blob(blob_name=jobStoreFileID)
                        raise

                    if checkForModification and expectedVersion is not None:
                        # Acquire a (60-second) write lock,
                        leaseID = container.lease_blob(blob_name=jobStoreFileID,
                                                       x_ms_lease_action='acquire')['x-ms-lease-id']
                        # check for modification,
                        blobProperties = container.get_blob_properties(blob_name=jobStoreFileID)
                        if blobProperties['etag'] != expectedVersion:
                            container.lease_blob(blob_name=jobStoreFileID,
                                                 x_ms_lease_action='release',
                                                 x_ms_lease_id=leaseID)
                            raise ConcurrentFileModificationException(jobStoreFileID)
                        # commit the file,
                        container.put_block_list(blob_name=jobStoreFileID,
                                                 block_list=blockIDs,
                                                 x_ms_lease_id=leaseID,
                                                 x_ms_meta_name_values=dict(
                                                     encrypted=str(encrypted)))
                        # then release the lock.
                        container.lease_blob(blob_name=jobStoreFileID,
                                             x_ms_lease_action='release',
                                             x_ms_lease_id=leaseID)
                    else:
                        # No need to check for modification, just blindly write over whatever
                        # was there.
                        container.put_block_list(blob_name=jobStoreFileID,
                                                 block_list=blockIDs,
                                                 x_ms_meta_name_values=dict(
                                                     encrypted=str(encrypted)))

                thread = ExceptionalThread(target=reader)
                thread.start()
                yield writable
            # The writable is now closed. This will send EOF to the readable and cause that
            # thread to finish.
            thread.join()

    @contextmanager
    def _downloadStream(self, jobStoreFileID, container):
        # The reason this is not in the writer is so we catch non-existant blobs early

        blobProps = container.get_blob_properties(blob_name=jobStoreFileID)

        encrypted = strict_bool(blobProps['x-ms-meta-encrypted'])
        if encrypted and self.keyPath is None:
            raise AssertionError('Content is encrypted but no key was provided.')

        readable_fh, writable_fh = os.pipe()
        with os.fdopen(readable_fh, 'r') as readable:
            with os.fdopen(writable_fh, 'w') as writable:
                def writer():
                    try:
                        chunkStartPos = 0
                        fileSize = int(blobProps['Content-Length'])
                        while chunkStartPos < fileSize:
                            chunkEndPos = chunkStartPos + self._maxAzureBlockBytes - 1
                            buf = container.get_blob(blob_name=jobStoreFileID,
                                                     x_ms_range="bytes=%d-%d" % (chunkStartPos,
                                                                                 chunkEndPos))
                            if encrypted:
                                buf = encryption.decrypt(buf, self.keyPath)
                            writable.write(buf)
                            chunkStartPos = chunkEndPos + 1
                    finally:
                        # Ensure readers aren't left blocking if this thread crashes.
                        # This close() will send EOF to the reading end and ultimately cause the
                        # yield to return. It also makes the implict .close() done by the enclosing
                        # "with" context redundant but that should be ok since .close() on file
                        # objects are idempotent.
                        writable.close()

                thread = ExceptionalThread(target=writer)
                thread.start()
                yield readable
                thread.join()
Esempio n. 46
0
class AzureFS(LoggingMixIn, Operations):
    """Azure Blob Storage filesystem"""

    blobs = None
    containers = dict()  # <cname, dict(stat:dict,
                                    #files:None|dict<fname, stat>)
    fds = dict()  # <fd, (path, bytes, dirty)>
    fd = 0

    def __init__(self, account, key):
        self.blobs = BlobService(account, key)
        self.rebuild_container_list()

    def convert_to_epoch(self, date):
        """Converts Tue, 31 Jul 2012 07:17:34 GMT format to epoch"""
        return int(time.mktime(time.strptime(date, TIME_FORMAT)))

    def rebuild_container_list(self):
        cmap = dict()
        cnames = set()
        for c in self.blobs.list_containers():
            date = c.properties.last_modified
            cstat = dict(st_mode=(S_IFDIR | 0755), st_uid=getuid(), st_size=0,
                         st_mtime=self.convert_to_epoch(date))
            cname = c.name
            cmap['/' + cname] = dict(stat=cstat, files=None)
            cnames.add(cname)

        cmap['/'] = dict(files={},
                         stat=dict(st_mode=(S_IFDIR | 0755),
                                     st_uid=getuid(), st_size=0,
                                     st_mtime=int(time.time())))

        self.containers = cmap   # destroys fs tree cache resistant to misses

    def _parse_path(self, path):    # returns </dir, file(=None)>
        if path.count('/') > 1:     # file
            return str(path[:path.rfind('/')]), str(path[path.rfind('/') + 1:])
        else:                       # dir
            pos = path.rfind('/', 1)
            if pos == -1:
                return path, None
            else:
                return str(path[:pos]), None

    def parse_container(self, path):
        base_container = path[1:]   # /abc/def/g --> abc
        if base_container.find('/') > -1:
            base_container = base_container[:base_container.find('/')]
        return str(base_container)

    def _get_dir(self, path, contents_required=False):
        if not self.containers:
            self.rebuild_container_list()

        if path in self.containers and not (contents_required and \
                self.containers[path]['files'] is None):
            return self.containers[path]

        cname = self.parse_container(path)

        if '/' + cname not in self.containers:
            raise FuseOSError(ENOENT)
        else:
            if self.containers['/' + cname]['files'] is None:
                # fetch contents of container
                log.info("------> CONTENTS NOT FOUND: %s" % cname)

                blobs = self.blobs.list_blobs(cname)

                dirstat = dict(st_mode=(S_IFDIR | 0755), st_size=0,
                               st_uid=getuid(), st_mtime=time.time())

                if self.containers['/' + cname]['files'] is None:
                    self.containers['/' + cname]['files'] = dict()

                for f in blobs:
                    blob_name = f.name
                    blob_date = f.properties.last_modified
                    blob_size = long(f.properties.content_length)

                    node = dict(st_mode=(S_IFREG | 0644), st_size=blob_size,
                                st_mtime=self.convert_to_epoch(blob_date),
                                st_uid=getuid())

                    if blob_name.find('/') == -1:  # file just under container
                        self.containers['/' + cname]['files'][blob_name] = node

            return self.containers['/' + cname]
        return None

    def _get_file(self, path):
        d, f = self._parse_path(path)
        dir = self._get_dir(d, True)
        if dir is not None and f in dir['files']:
            return dir['files'][f]

    def getattr(self, path, fh=None):
        d, f = self._parse_path(path)

        if f is None:
            dir = self._get_dir(d)
            return dir['stat']
        else:
            file = self._get_file(path)

            if file:
                return file

        raise FuseOSError(ENOENT)

    # FUSE
    def mkdir(self, path, mode):
        if path.count('/') <= 1:    # create on root
            name = path[1:]

            if not 3 <= len(name) <= 63:
                log.error("Container names can be 3 through 63 chars long.")
                raise FuseOSError(ENAMETOOLONG)
            if name is not name.lower():
                log.error("Container names cannot contain uppercase \
                        characters.")
                raise FuseOSError(EACCES)
            if name.count('--') > 0:
                log.error('Container names cannot contain consecutive \
                        dashes (-).')
                raise FuseOSError(EAGAIN)
            #TODO handle all "-"s must be preceded by letter or numbers
            #TODO starts with only letter or number, can contain letter, nr,'-'

            resp = self.blobs.create_container(name)

            if resp:
                self.rebuild_container_list()
                log.info("CONTAINER %s CREATED" % name)
            else:
                raise FuseOSError(EACCES)
                log.error("Invalid container name or container already \
                        exists.")
        else:
            raise FuseOSError(ENOSYS)  # TODO support 2nd+ level mkdirs

    def rmdir(self, path):
        if path.count('/') == 1:
            c_name = path[1:]
            resp = self.blobs.delete_container(c_name)

            if resp:
                if path in self.containers:
                    del self.containers[path]
            else:
                raise FuseOSError(EACCES)
        else:
            raise FuseOSError(ENOSYS)  # TODO support 2nd+ level mkdirs

    def create(self, path, mode):
        node = dict(st_mode=(S_IFREG | mode), st_size=0, st_nlink=1,
                     st_uid=getuid(), st_mtime=time.time())
        d, f = self._parse_path(path)

        if not f:
            log.error("Cannot create files on root level: /")
            raise FuseOSError(ENOSYS)

        dir = self._get_dir(d, True)
        if not dir:
            raise FuseOSError(EIO)
        dir['files'][f] = node

        return self.open(path, data='')     # reusing handler provider

    def open(self, path, flags=0, data=None):
        if data == None:                    # download contents
            c_name = self.parse_container(path)
            f_name = path[path.find('/', 1) + 1:]

            try:
                data = self.blobs.get_blob(c_name, f_name)
            except AzureMissingResourceHttpError:
                dir = self._get_dir('/' + c_name, True)
                if f_name in dir['files']:
                    del dir['files'][f_name]
                raise FuseOSError(ENOENT)
            except AzureException as e:
                log.error("Read blob failed HTTP %d" % e.code)
                raise FuseOSError(EAGAIN)

        self.fd += 1
        self.fds[self.fd] = (path, data, False)

        return self.fd

    def flush(self, path, fh=None):
        if not fh:
            raise FuseOSError(EIO)
        else:
            if fh not in self.fds:
                raise FuseOSError(EIO)
            path = self.fds[fh][0]
            data = self.fds[fh][1]
            dirty = self.fds[fh][2]

            if not dirty:
                return 0     # avoid redundant write

            d, f = self._parse_path(path)
            c_name = self.parse_container(path)

            if data is None:
                data = ''

            try:
                if len(data) < 64 * 1024 * 1024:   # 64 mb
                    self.blobs.put_blob(c_name, f, data, 'BlockBlob')
                else:
                    # divide file by blocks and upload
                    block_size = 8 * 1024 * 1024
                    num_blocks = int(math.ceil(len(data) * 1.0 / block_size))
                    rd = str(random.randint(1, 1e8))
                    block_ids = list()

                    for i in range(num_blocks):
                        part = data[i * block_size:min((i + 1) * block_size,
                            len(data))]
                        block_id = base64.encodestring('%s_%s' % (rd,
                            (8 - len(str(i))) * '0' + str(i)))
                        self.blobs.put_block(c_name, f, part, block_id)
                        block_ids.append(block_id)

                    self.blobs.put_block_list(c_name, f, block_ids)
            except AzureException:
                raise FuseOSError(EAGAIN)

            dir = self._get_dir(d, True)
            if not dir or f not in dir['files']:
                raise FuseOSError(EIO)

            # update local data
            dir['files'][f]['st_size'] = len(data)
            dir['files'][f]['st_mtime'] = time.time()
            self.fds[fh] = (path, data, False)  # mark as not dirty
            return 0

    def release(self, path, fh=None):
        if fh is not None and fh in self.fds:
            del self.fds[fh]

    def truncate(self, path, length, fh=None):
        return 0     # assume done, no need

    def write(self, path, data, offset, fh=None):
        if not fh or fh not in self.fds:
            raise FuseOSError(ENOENT)
        else:
            d = self.fds[fh][1]
            if d is None:
                d = ""
            self.fds[fh] = (self.fds[fh][0], d[:offset] + data, True)
            return len(data)

    def unlink(self, path):
        c_name = self.parse_container(path)
        d, f = self._parse_path(path)

        try:
            self.blobs.delete_blob(c_name, f)

            _dir = self._get_dir(path, True)
            if _dir and f in _dir['files']:
                del _dir['files'][f]
            return 0
        except AzureMissingResourceHttpError:
            raise FuseOSError(ENOENT)
        except Exception as e:
            raise FuseOSError(EAGAIN)

    def readdir(self, path, fh):
        if path == '/':
            return ['.', '..'] + [x[1:] for x in self.containers.keys() \
                    if x is not '/']

        dir = self._get_dir(path, True)
        if not dir:
            raise FuseOSError(ENOENT)
        return ['.', '..'] + dir['files'].keys()

    def read(self, path, size, offset, fh):
        if not fh or fh not in self.fds:
            raise FuseOSError(ENOENT)

        f_name = path[path.find('/', 1) + 1:]
        c_name = path[1:path.find('/', 1)]

        try:
            data = self.blobs.get_blob(c_name, f_name)
            self.fds[fh] = (self.fds[fh][0], data, False)
            return data[offset:offset + size]
        except URLError, e:
            if e.code == 404:
                raise FuseOSError(ENOENT)
            elif e.code == 403:
                raise FUSEOSError(EPERM)
            else:
                log.error("Read blob failed HTTP %d" % e.code)
                raise FuseOSError(EAGAIN)
        data = self.fds[fh][1]
        if data is None:
            data = ""
        return data[offset:offset + size]
Esempio n. 47
0
    'redis_server': os.environ['REDIS_SERVER'],
    'instr_key': os.environ['INSTR_KEY']
    }

stor_acc_name = service_keys['stor_acc_name']
stor_acc_key = service_keys['stor_acc_key']
redis_pass = service_keys['redis_pass']
redis_server = service_keys['redis_server']
instr_key = service_keys['instr_key']


# storage
account_name = stor_acc_name
account_key = stor_acc_key
blob_service = BlobService(account_name, account_key)
blob_service.create_container('images')
queue_service = QueueService(account_name, account_key)
queue_service.create_queue('taskqueue')
table_service = TableService(account_name, account_key)
table_service.create_table('tasktable')


r = redis.StrictRedis(host=redis_server, port=6380, db=0, password=redis_pass, ssl=True)

tc = TelemetryClient(instr_key)

@app.route('/')
@app.route('/home')
def form():
    return render_template('form_submit.html')