class RgbAzureRepository: """Performs storage/retrieval functions for RGB sequences""" def __init__(self, azure_account_name, azure_account_key): self.table_service = TableService(account_name=azure_account_name, account_key=azure_account_key) def create_rgb_sequence_table(self): self.table_service.create_table('rgbsequences') #def add_color_to_sequence(sequenceName, red, green, blue): # color = Entity() # color.PartitionKey = sequenceName # color.RowKey = '1' # color.Red = red # color.Green = green # color.Blue = blue # # table_service.insert_entity('rgbsequences', color) def get_sequence(self, sequence_name): colors = table_service.query_entities('rgbsequences', "PartitionKey eq '%s'" % sequence_name) sequence = ColorSequence(sequence_name) for color in colors: rgb = RgbColor(color.red, color.green, color.blue) sequence.add_color(rgb) return sequence
def main(): blob_container = request.json["container"] blob_id = request.json["id"] # Load up the .json from blob service blob_service = BlockBlobService(account_name=_storage_account, account_key=_storage_key) blob = blob_service.get_blob_to_text(blob_container, blob_id) # verbatims is a list of strings verbatims = json.loads(blob.content) # Generate a UUID for this job, since it's going to be a long running task # we're going to return the id to the caller and track job status is the table 'ldajobs' jobid = str(uuid.uuid4()) # Create the table row for this job, initially status is 'started' table_service = TableService(account_name=_storage_account, account_key=_storage_key) table_service.create_table("ldajobs") task = {'PartitionKey': 'lda_jobs', 'RowKey': jobid, 'status': 'started'} table_service.insert_entity('ldajobs', task) # Actually start the job threading.Thread(target=lda, args=( jobid, verbatims, )).start() # .. and immediately return the jobid to the caller return Response("%s verbatims now processing" % len(verbatims), status=200, mimetype='plain/text')
def init_table(): table_service = TableService(account_name=os.environ['STORAGE_ACCOUNT_NAME'], account_key=os.environ['STORAGE_ACCOUNT_KEY']) table_name = os.environ['TABLE_NAME'] table_service.create_table(table_name) pk = socket.gethostname() rkroot = str(uuid.uuid4()) return { 'service': table_service, 'name': table_name, 'pk': pk, 'rk': rkroot }
class SummaryTable: def __init__(self, account_name, account_key, table_name="summary"): """Initialiaze a table to store summary data. Values must be provided for 'account_name' and 'account_key' which are values associated with the Azure Storage account. 'table_name' is optional and is the name of the table used (and created if necessary) in the storage account. """ self.log = Log() self.account_name = account_name self.account_key = account_key self.table_name = table_name self.createAzureTable() def createAzureTable(self): """ Create an Azure Table in which to store the summary results. """ self.table_service = TableService(self.account_name, self.account_key) self.table_service.create_table(self.table_name) def deleteTable(self, name): """ Delete a table in which summary results have been stored. """ self.table_service.delete_table(name, False) def writeCount(self, count_type, count): entry = {'PartitionKey': "count", "RowKey": count_type, 'total_count' : count} self.table_service.insert_entity(self.table_name, entry) def updateCount(self, count_type, count): entry = {'total_count' : count} self.table_service.update_entity(self.table_name, "count", count_type, entry) def getCount(self, event_type): """ Get the total number of events of a given type. """ count = 0 entries = self.table_service.query_entities(self.table_name, "PartitionKey eq 'count' and RowKey eq '" + event_type + "'") if len(entries) == 0: self.writeCount(event_type, 0) elif len(entries) > 1: raise Exception('We have more than one summary entry for ' + event_type) else: count = entries[0].total_count return count
def prepare_storage_account(storage_account_name, storage_access_key, endpoint_suffix, protocol="https"): blob_service = AppendBlobService(account_name=storage_account_name, account_key=storage_access_key, endpoint_suffix=endpoint_suffix, protocol=protocol) blob_service.create_container('bosh') blob_service.create_container( container_name='stemcell', public_access='blob' ) # Prepare the table for storing metadata of storage account and stemcells table_service = TableService(account_name=storage_account_name, account_key=storage_access_key, endpoint_suffix=endpoint_suffix, protocol=protocol) table_service.create_table('stemcells')
def prepare(self, area): assert area is not None, 'area is none; should already be validated' area_config = config.load_area(area) tracking_config = config.load_tracking(area_config['tracking']) table_service = TableService(account_name=tracking_config['name'], account_key=tracking_config['key1']) trackingContainer = self._get_table(area) table_service.create_table(self._get_table(area))
class MonitorManager(object): def __init__(self): #Please first create Azure Storage account and obtain your account name and key self._tableService = TableService(account_name='YOUR_ACCOUNT_NAME', account_key='YOUR_ACCOUNT_KEY') self._tableService.create_table('sensordata') def Insert(self, distance, currentTime): distanceData = Entity() distanceData.PartitionKey = 'sensorKey' distanceData.RowKey = str(uuid.uuid1()) distanceData.distance = str(distance) distanceData.time = str(currentTime) self._tableService.insert_entity('sensordata', distanceData)
def prepare_storage(settings): default_storage_account_name = settings["DEFAULT_STORAGE_ACCOUNT_NAME"] storage_access_key = settings["STORAGE_ACCESS_KEY"] blob_service = AppendBlobService(default_storage_account_name, storage_access_key) blob_service.create_container('bosh') blob_service.create_container( container_name='stemcell', public_access='blob' ) # Prepare the table for storing meta datas of storage account and stemcells table_service = TableService(default_storage_account_name, storage_access_key) table_service.create_table('stemcells')
def prepare_storage(settings): default_storage_account_name = settings["DEFAULT_STORAGE_ACCOUNT_NAME"] storage_access_key = settings["STORAGE_ACCESS_KEY"] endpoint_suffix = settings["SERVICE_HOST_BASE"] blob_service = AppendBlobService(account_name=default_storage_account_name, account_key=storage_access_key, endpoint_suffix=endpoint_suffix) blob_service.create_container('bosh') blob_service.create_container( container_name='stemcell', public_access='blob' ) # Prepare the table for storing meta datas of storage account and stemcells table_service = TableService(account_name=default_storage_account_name, account_key=storage_access_key, endpoint_suffix=endpoint_suffix) table_service.create_table('stemcells') # For secondary default_storage_account_name_secondary = settings["DEFAULT_STORAGE_ACCOUNT_NAME_SECONDARY"] default_storage_access_key_secondary = settings["DEFAULT_STORAGE_ACCESS_KEY_SECONDARY"] endpoint_suffix = settings["SERVICE_HOST_BASE"] blob_service = AppendBlobService(account_name=default_storage_account_name_secondary, account_key=default_storage_access_key_secondary, endpoint_suffix=endpoint_suffix) blob_service.create_container('bosh') blob_service.create_container( container_name='stemcell', public_access='blob' ) # Prepare the table for storing meta datas of storage account and stemcells table_service = TableService(account_name=default_storage_account_name_secondary, account_key=default_storage_access_key_secondary, endpoint_suffix=endpoint_suffix) table_service.create_table('stemcells') # Prepare primary premium storage account storage_account_name_primary = settings["STORAGE_ACCOUNT_NAME_PRIMARY"] storage_access_key_primary = settings["STORAGE_ACCESS_KEY_PRIMARY"] endpoint_suffix = settings["SERVICE_HOST_BASE"] blob_service = AppendBlobService(account_name=storage_account_name_primary, account_key=storage_access_key_primary, endpoint_suffix=endpoint_suffix) blob_service.create_container('bosh') blob_service.create_container('stemcell') # Prepare secondary premium storage account storage_account_name_secondary = settings["STORAGE_ACCOUNT_NAME_SECONDARY"] storage_access_key_secondary = settings["STORAGE_ACCESS_KEY_SECONDARY"] endpoint_suffix = settings["SERVICE_HOST_BASE"] blob_service = AppendBlobService(account_name=storage_account_name_secondary, account_key=storage_access_key_secondary, endpoint_suffix=endpoint_suffix) blob_service.create_container('bosh') blob_service.create_container('stemcell')
def connection(tasks, query, types): table_service = TableService(account_name=acc_name, account_key=acc_key) table_service.create_table('customer') if types == "insert": print(tasks) table_service.insert_entity('customer', tasks) return "added Successfully!!!!" elif types == "reterive": #print(query) tasks = table_service.query_entities('customer', filter=query) #print(tasks) df = pd.DataFrame(df_con(tasks)) data1 = df.to_json(orient='records') #print(data1) return data1
def load_from_csv(relative_path: str, storage_account: TableService): filenames = os.listdir(relative_path) for filename in filenames: if filename.endswith(".csv"): table_name = filename[:-4] storage_account.create_table(table_name) df = pd.read_csv(os.path.join(relative_path, filename)) for _, row in df.iterrows(): d = pd.Series.to_dict(row) entity = {} for k, v in d.items(): if k == 'Timestamp' or k.endswith('@type'): pass else: entity[k] = v storage_account.insert_entity(table_name, entity)
class Repository(object): """Azure Table storage repository for UOTD.""" def __init__(self, settings): """Initialise UOTD repository with the given settings dict. Required settings: STORAGE_NAME -- the Azure Storage account name STORAGE_KEY -- an access key for the Storage account STORAGE_TABLE_UOTD -- the name of the table """ self.service = TableService(settings["STORAGE_NAME"], settings["STORAGE_KEY"]) self.uotd_table = settings["STORAGE_TABLE_UOTD"] self.service.create_table(self.uotd_table) self.partition_key_format = "%Y%m" self.row_key_format = "%d" def get_uotd(self): """Get the UUID for the current day. If the UUID does not yet exist then it will be created. """ partition_key = date.today().strftime(self.partition_key_format) row_key = date.today().strftime(self.row_key_format) try: uotd_entity = self.service.get_entity(self.uotd_table, partition_key, row_key) uuid = uotd_entity.uuid except AzureMissingResourceHttpError: uuid = str(uuid4()) uotd_entity = { "PartitionKey": partition_key, "RowKey": row_key, "uuid": uuid } self.service.insert_entity(self.uotd_table, uotd_entity) return uuid def get_uuids(self, partition_key): """Get all the UUIDs in a given partition.""" filter = "PartitionKey eq '{0}'".format(partition_key) entities = self.service.query_entities(self.uotd_table, filter=filter) return entities
def post_table_log(json_str): from azure.storage.table import TableService, Entity table_service = TableService(account_name=pkey.azure_storage_id, account_key=pkey.azure_storage_key) table_service.create_table("facebooklog") def get_table_timestamp_key(): import time current_time = time.gmtime() start = time.mktime(current_time) last = time.mktime(time.struct_time((2070,1,1,0,0,0,3,100,-1))) return str(int(last - start)) task = Entity() task.PartitionKey = 'feedlog' task.RowKey = get_table_timestamp_key() task.json = json_str table_service.insert_entity('facebooklog', task)
def SendAzure(): table_service = TableService(account_name='[NAMEHERE]', account_key='[KEYHERE]') table_name = 'tempData' partition_key = 'central' table_service.create_table(table_name, False) date = datetime.datetime.now() iso_date = date.isoformat() tempRecord = ReadTemp() result = "" if(tempRecord < 70): entry = {'PartitionKey': partition_key, 'RowKey': iso_date, 'Temperature': tempRecord} table_service.insert_entity(table_name, entry) result = "SENT " + str(tempRecord) else: result = "ERROR " + str(tempRecord) return result
def prepare_storage(settings): default_storage_account_name = settings["DEFAULT_STORAGE_ACCOUNT_NAME"] storage_access_key = settings["DEFAULT_STORAGE_ACCESS_KEY"] endpoint_suffix = settings["SERVICE_HOST_BASE"] protocol = "https" if settings["ENVIRONMENT"] == "AzureStack": protocol = "http" blob_service = AppendBlobService(account_name=default_storage_account_name, account_key=storage_access_key, endpoint_suffix=endpoint_suffix, protocol=protocol) blob_service.create_container('bosh') blob_service.create_container( container_name='stemcell', public_access='blob' ) # Prepare the table for storing meta datas of storage account and stemcells table_service = TableService(account_name=default_storage_account_name, account_key=storage_access_key, endpoint_suffix=endpoint_suffix, protocol=protocol) table_service.create_table('stemcells')
def claim_and_run_device(driver_id): iot_hub = IoTHub(IOT_HUB_NAME, IOT_HUB_OWNER_KEY) device, device_twin = iot_hub.claim_device(driver_id) device_twin_json = json.loads(device_twin) device_id = device_twin_json['deviceId'] iothub_device = IoTHubDevice(IOT_HUB_NAME, device_id, device.primaryKey) table_service = TableService(account_name=STORAGE_ACCOUNT_NAME, account_key=STORAGE_ACCOUNT_KEY) table_service.create_table('logs', fail_on_exist=False) def report_state(state): iothub_device.send_reported_state(state) def send_telemetry(data): iothub_device.send_message(data) def log(message, code, level): level_name = logging.getLevelName(level) log_entity = { 'PartitionKey': device_id, 'RowKey': uuid.uuid4().hex, 'Level': level_name, 'Code': code, 'Message': message, '_Driver': driver_id } print(', '.join([driver_id, device_id, str(level_name), str(code), str(message)])) table_service.insert_or_replace_entity('logs', log_entity) if level == logging.CRITICAL: # disable device iot_hub.disable_device(device_id) device_simulator = SimulatorFactory.create('devices.engines.Engine', report_state, send_telemetry, log) if not device_simulator.initialize(device_twin_json): return def device_twin_callback(update_state, payload, user_context): device_simulator.on_update(str(update_state), json.loads(payload)) iothub_device.client.set_device_twin_callback(device_twin_callback, 0) device_simulator.run()
def _setup_tables(account_name, account_key, table_name, batch_size=100, max_num=1000000): table_service = TableService(account_name, account_key) table_service.create_table(table_name) partitions = defaultdict(list) for num in range(1, max_num + 1): partitions[('%03d' % num)[:3]].append(str(num)) for partition, nums in partitions.items(): for batch_num, batch in enumerate(_grouper(nums, batch_size), start=1): table_batch = TableBatch() for num in filter(None, batch): table_batch.insert_entity({ 'PartitionKey': partition, 'RowKey': num, 'value': str(uuid4()), }) table_service.commit_batch(table_name, table_batch) print('Done with partition %s, batch %d' % (partition, batch_num))
def SendAzure(): table_service = TableService(account_name='[NAMEHERE]', account_key='[KEYHERE]') table_name = 'tempData' partition_key = 'central' table_service.create_table(table_name, False) date = datetime.datetime.now() iso_date = date.isoformat() tempRecord = ReadTemp() result = "" if (tempRecord < 70): entry = { 'PartitionKey': partition_key, 'RowKey': iso_date, 'Temperature': tempRecord } table_service.insert_entity(table_name, entry) result = "SENT " + str(tempRecord) else: result = "ERROR " + str(tempRecord) return result
class AzureStorage(BaseStorage): def __init__(self, env): super(AzureStorage, self).__init__(env) self._ts = \ TableService( account_name=Config.azure_storage_account_name, account_key=Config.azure_storage_account_key) self._benchmark = self._env.benchmark.name # Make sure our table exists Debug.info << "Creating tableservice for benchmark : " << \ self.table_name() << "\n" self._ts.create_table(self.table_name()) def table_name(self): return self._env.table_name def save(self, dic, partition=None, key=''): dic['RowKey'] = str(self.reverse_timestamp()) dic['Cloud'] = str(self._env.cloud_name) if key: dic['RowKey'] = dic['RowKey'] + '_' + str(key) # Don't really need the partition key right now if partition is None: dic['PartitionKey'] = self._env.benchmark.name else: dic['PartitionKey'] = partition try: self._ts.insert_entity(self.table_name(), dic) except: print >> sys.stderr, "Error saving: %s" % dic
import os from azure.storage.table import TableService, Entity, TablePermissions from azure.storage.blob import BlockBlobService STORAGE_ACCOUNT_NAME = os.environ['STORAGE_ACCOUNT_NAME'] STORAGE_ACCOUNT_KEY = os.environ['STORAGE_ACCOUNT_KEY'] table_service = TableService(account_name=STORAGE_ACCOUNT_NAME, account_key=STORAGE_ACCOUNT_KEY) block_blob_service = BlockBlobService(account_name=STORAGE_ACCOUNT_NAME, account_key=STORAGE_ACCOUNT_KEY) block_blob_service.create_container('telemetry') table_service.create_table('cycles') table_service.create_table('features') table_service.create_table('predictions') table_service.create_table('databricks')
class TableStorage(): def __init__(self, CONNECTION_STRING): """ Constructor. Espera el Connection String del Azure Storage Account. Se obtiene ingresando al recurso de Storage -> Access Keys Parametros: CONNECTION_STRING = El string que incluye el AccountName, AccountKey y el EndPointSuffix """ self.CONNECTION_STRING = CONNECTION_STRING # Separa por partes el string de conexión Config = dict( s.split('=', 1) for s in CONNECTION_STRING.split(';') if s) # Obtiene el nombre de la cuenta de storage y en EndpointSuffix self.AccountName = Config.get('AccountName') self.EndPointSuffix = Config.get('EndpointSuffix') def CreateTableServices(self): """ Inicializa una instancia del Table Services para poder comunicarse con el storage en Azure """ self.TableService = TableService( account_name=self.AccountName, connection_string=self.CONNECTION_STRING, endpoint_suffix=self.EndPointSuffix) def createTable(self, TableName): """ Revisa si la tabla no exista ya y la crea. De lo contrario, avisa que ya existe. Paramentros: TableName = Nombre de la tabla que se quiere crear """ print('\nCreate a table with name - ' + TableName) if (self.TableService.exists(TableName) != True): self.TableService.create_table(TableName) print("Table created succesfully!") else: print('Error creating table, ' + TableName + ' check if it already exists') def insertEntity(self, TableName, Entity): """ Se inserta una entidad a la tabla especificada. Paramentros: TableName = Nombre de la tabla que se quiere crear Entity = El objecto con la entidad que se quiere agregar """ print('\nInserting a new entity into table - ' + TableName) self.TableService.insert_or_merge_entity(TableName, Entity) print('Successfully inserted the new entity') def getEntity(self, TableName, PartitionKey, RowKey): """ Traerse la entidad completa en base a la Partition Key y Row Key. Regresa un objeto como tal, no hay que hacer json.loads() Paramentros: TableName = Nombre de la tabla que se quiere crear PartitionKey = String con la partition key de la entidad deseada RowKey = String con la row key de la entidad deseada """ print('\nGetting entity.') Entity = self.TableService.get_entity(TableName, PartitionKey, RowKey) return Entity def updateEntity(self, TableName, NewEntity): """ Toma el objeto con los datos actualizados y hace update en la table storage. Paramentros: TableName = Nombre de la tabla que se quiere crear NewEntity = El objecto con la entidad que se quiere hacer update """ print('\nUpdating entity. PK: ' + NewEntity.PartitionKey + ' RK: ' + NewEntity.RowKey) self.TableService.update_entity(TableName, NewEntity) def deleteEntity(self, TableName, PartitionKey, RowKey): """ Borrar la entidad que coincida en Partition Key y Row Key Paramentros: TableName = Nombre de la tabla que se quiere crear PartitionKey = String con la partition key de la entidad RowKey = String con la row key de la entidad """ print('\nDeleting entity') self.TableService.delete_entity(TableName, PartitionKey, RowKey) def deleteTable(self, TableName): """ Revisa si la tabla existe y la borra, en caso contrario solo avisa que no existe. Paramentros: TableName = Nombre de la tabla que se quiere borrar """ print('\nDeleting the table.') if (self.TableService.exists(TableName)): self.TableService.delete_table(TableName) print('Successfully deleted the table') else: print('The table does not exists')
class StorageContext(): """Initializes the repository with the specified settings dict. Required settings in config dict are: - AZURE_STORAGE_NAME - STORAGE_KEY """ tables = [] queues = [] tableservice = TableService queueservice = QueueService storage_key = '' storage_name = '' def __init__(self, **kwargs): self.storage_name = kwargs.get('AZURE_STORAGE_NAME', '') self.storage_key = kwargs.get('AZURE_STORAGE_KEY', '') """ service init """ self.tables = [] if self.storage_key != '' and self.storage_name != '': self.tableservice = TableService(self.storage_name, self.storage_key) self.queueservice = QueueService(self.storage_name, self.storage_key) else: self.tableservice = TableService self.queueservice = QueueService pass def create_table(self, tablename) -> bool: if (not tablename in self.tables) and (not self.tableservice is None): try: self.tableservice.create_table(tablename) self.tables.append(tablename) return True except AzureException as e: log.error('failed to create {} with error {}'.format( tablename, e)) return False else: return True pass def table_isempty(self, tablename, PartitionKey='', RowKey='') -> bool: if (tablename in self.tables) and (not self.tableservice is None): filter = "PartitionKey eq '{}'".format( PartitionKey) if PartitionKey != '' else '' if filter == '': filter = "RowKey eq '{}'".format( RowKey) if RowKey != '' else '' else: filter = filter + ("and RowKey eq '{}'".format(RowKey) if RowKey != '' else '') try: entities = list( self.tableservice.query_entities(tablename, filter=filter, select='PartitionKey', num_results=1)) if len(entities) == 1: return False else: return True except AzureMissingResourceHttpError as e: log.debug('failed to query {} with error {}'.format( tablename, e)) return True else: return True pass def create_queue(self, queuename) -> bool: if (not queuename in self.queues) and (not self.queueservice is None): try: self.queueservice.create_queue(queuename) self.queues.append(queuename) return True except AzureException as e: log.error('failed to create {} with error {}'.format( queuename, e)) return False else: return True pass def register_model(self, storagemodel): if isinstance(storagemodel, StorageTableEntity): self.create_table(storagemodel._tablename) pass pass
class AzureJobStore(AbstractJobStore): """ A job store that uses Azure's blob store for file storage and Table Service to store job info with strong consistency. """ # Dots in container names should be avoided because container names are used in HTTPS bucket # URLs where the may interfere with the certificate common name. We use a double underscore # as a separator instead. # containerNameRe = re.compile(r'^[a-z0-9](-?[a-z0-9]+)+[a-z0-9]$') # See https://msdn.microsoft.com/en-us/library/azure/dd135715.aspx # minContainerNameLen = 3 maxContainerNameLen = 63 maxNameLen = 10 nameSeparator = 'xx' # Table names must be alphanumeric # Length of a jobID - used to test if a stats file has been read already or not jobIDLength = len(str(uuid.uuid4())) def __init__(self, locator, jobChunkSize=maxAzureTablePropertySize): super(AzureJobStore, self).__init__() accountName, namePrefix = locator.split(':', 1) if '--' in namePrefix: raise ValueError("Invalid name prefix '%s'. Name prefixes may not contain %s." % (namePrefix, self.nameSeparator)) if not self.containerNameRe.match(namePrefix): raise ValueError("Invalid name prefix '%s'. Name prefixes must contain only digits, " "hyphens or lower-case letters and must not start or end in a " "hyphen." % namePrefix) # Reserve 13 for separator and suffix if len(namePrefix) > self.maxContainerNameLen - self.maxNameLen - len(self.nameSeparator): raise ValueError(("Invalid name prefix '%s'. Name prefixes may not be longer than 50 " "characters." % namePrefix)) if '--' in namePrefix: raise ValueError("Invalid name prefix '%s'. Name prefixes may not contain " "%s." % (namePrefix, self.nameSeparator)) self.locator = locator self.jobChunkSize = jobChunkSize self.accountKey = _fetchAzureAccountKey(accountName) self.accountName = accountName # Table names have strict requirements in Azure self.namePrefix = self._sanitizeTableName(namePrefix) # These are the main API entry points. self.tableService = TableService(account_key=self.accountKey, account_name=accountName) self.blobService = BlobService(account_key=self.accountKey, account_name=accountName) # Serialized jobs table self.jobItems = None # Job<->file mapping table self.jobFileIDs = None # Container for all shared and unshared files self.files = None # Stats and logging strings self.statsFiles = None # File IDs that contain stats and logging strings self.statsFileIDs = None @property def keyPath(self): return self.config.cseKey def initialize(self, config): if self._jobStoreExists(): raise JobStoreExistsException(self.locator) logger.debug("Creating job store at '%s'" % self.locator) self._bind(create=True) super(AzureJobStore, self).initialize(config) def resume(self): if not self._jobStoreExists(): raise NoSuchJobStoreException(self.locator) logger.debug("Using existing job store at '%s'" % self.locator) self._bind(create=False) super(AzureJobStore, self).resume() def destroy(self): self._bind() for name in 'jobItems', 'jobFileIDs', 'files', 'statsFiles', 'statsFileIDs': resource = getattr(self, name) if resource is not None: if isinstance(resource, AzureTable): resource.delete_table() elif isinstance(resource, AzureBlobContainer): resource.delete_container() else: assert False setattr(self, name, None) def _jobStoreExists(self): """ Checks if job store exists by querying the existence of the statsFileIDs table. Note that this is the last component that is deleted in :meth:`.destroy`. """ for attempt in retry_azure(): with attempt: try: table = self.tableService.query_tables(table_name=self._qualify('statsFileIDs')) except AzureMissingResourceHttpError as e: if e.status_code == 404: return False else: raise else: return table is not None def _bind(self, create=False): table = self._bindTable container = self._bindContainer for name, binder in (('jobItems', table), ('jobFileIDs', table), ('files', container), ('statsFiles', container), ('statsFileIDs', table)): if getattr(self, name) is None: setattr(self, name, binder(self._qualify(name), create=create)) def _qualify(self, name): return self.namePrefix + self.nameSeparator + name.lower() def jobs(self): # How many jobs have we done? total_processed = 0 for jobEntity in self.jobItems.query_entities_auto(): # Process the items in the page yield AzureJob.fromEntity(jobEntity) total_processed += 1 if total_processed % 1000 == 0: # Produce some feedback for the user, because this can take # a long time on, for example, Azure logger.debug("Processed %d total jobs" % total_processed) logger.debug("Processed %d total jobs" % total_processed) def create(self, jobNode): jobStoreID = self._newJobID() job = AzureJob.fromJobNode(jobNode, jobStoreID, self._defaultTryCount()) entity = job.toItem(chunkSize=self.jobChunkSize) entity['RowKey'] = EntityProperty('Edm.String', jobStoreID) self.jobItems.insert_entity(entity=entity) return job def exists(self, jobStoreID): if self.jobItems.get_entity(row_key=bytes(jobStoreID)) is None: return False return True def load(self, jobStoreID): jobEntity = self.jobItems.get_entity(row_key=bytes(jobStoreID)) if jobEntity is None: raise NoSuchJobException(jobStoreID) return AzureJob.fromEntity(jobEntity) def update(self, job): self.jobItems.update_entity(row_key=bytes(job.jobStoreID), entity=job.toItem(chunkSize=self.jobChunkSize)) def delete(self, jobStoreID): try: self.jobItems.delete_entity(row_key=bytes(jobStoreID)) except AzureMissingResourceHttpError: # Job deletion is idempotent, and this job has been deleted already return filterString = "PartitionKey eq '%s'" % jobStoreID for fileEntity in self.jobFileIDs.query_entities(filter=filterString): jobStoreFileID = fileEntity.RowKey self.deleteFile(jobStoreFileID) def getEnv(self): return dict(AZURE_ACCOUNT_KEY=self.accountKey) class BlobInfo(namedtuple('BlobInfo', ('account', 'container', 'name'))): @property @memoize def service(self): return BlobService(account_name=self.account, account_key=_fetchAzureAccountKey(self.account)) @classmethod def getSize(cls, url): blob = cls._parseWasbUrl(url) blobProps = blob.service.get_blob_properties(blob.container, blob.name) return int(blobProps['content-length']) @classmethod def _readFromUrl(cls, url, writable): blob = cls._parseWasbUrl(url) for attempt in retry_azure(): with attempt: blob.service.get_blob_to_file(container_name=blob.container, blob_name=blob.name, stream=writable) @classmethod def _writeToUrl(cls, readable, url): blob = cls._parseWasbUrl(url) blob.service.put_block_blob_from_file(container_name=blob.container, blob_name=blob.name, stream=readable) @classmethod def _parseWasbUrl(cls, url): """ :param urlparse.ParseResult url: x :rtype: AzureJobStore.BlobInfo """ assert url.scheme in ('wasb', 'wasbs') try: container, account = url.netloc.split('@') except ValueError: raise InvalidImportExportUrlException(url) suffix = '.blob.core.windows.net' if account.endswith(suffix): account = account[:-len(suffix)] else: raise InvalidImportExportUrlException(url) assert url.path[0] == '/' return cls.BlobInfo(account=account, container=container, name=url.path[1:]) @classmethod def _supportsUrl(cls, url, export=False): return url.scheme.lower() in ('wasb', 'wasbs') def writeFile(self, localFilePath, jobStoreID=None): jobStoreFileID = self._newFileID() self.updateFile(jobStoreFileID, localFilePath) self._associateFileWithJob(jobStoreFileID, jobStoreID) return jobStoreFileID def updateFile(self, jobStoreFileID, localFilePath): with open(localFilePath) as read_fd: with self._uploadStream(jobStoreFileID, self.files) as write_fd: while True: buf = read_fd.read(self._maxAzureBlockBytes) write_fd.write(buf) if len(buf) == 0: break def readFile(self, jobStoreFileID, localFilePath): try: with self._downloadStream(jobStoreFileID, self.files) as read_fd: with open(localFilePath, 'w') as write_fd: while True: buf = read_fd.read(self._maxAzureBlockBytes) write_fd.write(buf) if not buf: break except AzureMissingResourceHttpError: raise NoSuchFileException(jobStoreFileID) def deleteFile(self, jobStoreFileID): try: self.files.delete_blob(blob_name=bytes(jobStoreFileID)) self._dissociateFileFromJob(jobStoreFileID) except AzureMissingResourceHttpError: pass def fileExists(self, jobStoreFileID): # As Azure doesn't have a blob_exists method (at least in the # python API) we just try to download the metadata, and hope # the metadata is small so the call will be fast. try: self.files.get_blob_metadata(blob_name=bytes(jobStoreFileID)) return True except AzureMissingResourceHttpError: return False @contextmanager def writeFileStream(self, jobStoreID=None): # TODO: this (and all stream methods) should probably use the # Append Blob type, but that is not currently supported by the # Azure Python API. jobStoreFileID = self._newFileID() with self._uploadStream(jobStoreFileID, self.files) as fd: yield fd, jobStoreFileID self._associateFileWithJob(jobStoreFileID, jobStoreID) @contextmanager def updateFileStream(self, jobStoreFileID): with self._uploadStream(jobStoreFileID, self.files, checkForModification=True) as fd: yield fd def getEmptyFileStoreID(self, jobStoreID=None): jobStoreFileID = self._newFileID() with self._uploadStream(jobStoreFileID, self.files) as _: pass self._associateFileWithJob(jobStoreFileID, jobStoreID) return jobStoreFileID @contextmanager def readFileStream(self, jobStoreFileID): if not self.fileExists(jobStoreFileID): raise NoSuchFileException(jobStoreFileID) with self._downloadStream(jobStoreFileID, self.files) as fd: yield fd @contextmanager def writeSharedFileStream(self, sharedFileName, isProtected=None): assert self._validateSharedFileName(sharedFileName) sharedFileID = self._newFileID(sharedFileName) with self._uploadStream(sharedFileID, self.files, encrypted=isProtected) as fd: yield fd @contextmanager def readSharedFileStream(self, sharedFileName): assert self._validateSharedFileName(sharedFileName) sharedFileID = self._newFileID(sharedFileName) if not self.fileExists(sharedFileID): raise NoSuchFileException(sharedFileID) with self._downloadStream(sharedFileID, self.files) as fd: yield fd def writeStatsAndLogging(self, statsAndLoggingString): # TODO: would be a great use case for the append blobs, once implemented in the Azure SDK jobStoreFileID = self._newFileID() encrypted = self.keyPath is not None if encrypted: statsAndLoggingString = encryption.encrypt(statsAndLoggingString, self.keyPath) self.statsFiles.put_block_blob_from_text(blob_name=bytes(jobStoreFileID), text=statsAndLoggingString, x_ms_meta_name_values=dict( encrypted=str(encrypted))) self.statsFileIDs.insert_entity(entity={'RowKey': jobStoreFileID}) def readStatsAndLogging(self, callback, readAll=False): suffix = '_old' numStatsFiles = 0 for attempt in retry_azure(): with attempt: for entity in self.statsFileIDs.query_entities(): jobStoreFileID = entity.RowKey hasBeenRead = len(jobStoreFileID) > self.jobIDLength if not hasBeenRead: with self._downloadStream(jobStoreFileID, self.statsFiles) as fd: callback(fd) # Mark this entity as read by appending the suffix self.statsFileIDs.insert_entity(entity={'RowKey': jobStoreFileID + suffix}) self.statsFileIDs.delete_entity(row_key=bytes(jobStoreFileID)) numStatsFiles += 1 elif readAll: # Strip the suffix to get the original ID jobStoreFileID = jobStoreFileID[:-len(suffix)] with self._downloadStream(jobStoreFileID, self.statsFiles) as fd: callback(fd) numStatsFiles += 1 return numStatsFiles _azureTimeFormat = "%Y-%m-%dT%H:%M:%SZ" def getPublicUrl(self, jobStoreFileID): try: self.files.get_blob_properties(blob_name=bytes(jobStoreFileID)) except AzureMissingResourceHttpError: raise NoSuchFileException(jobStoreFileID) # Compensate of a little bit of clock skew startTimeStr = (datetime.utcnow() - timedelta(minutes=5)).strftime(self._azureTimeFormat) endTime = datetime.utcnow() + self.publicUrlExpiration endTimeStr = endTime.strftime(self._azureTimeFormat) sap = SharedAccessPolicy(AccessPolicy(startTimeStr, endTimeStr, BlobSharedAccessPermissions.READ)) sas_token = self.files.generate_shared_access_signature(blob_name=bytes(jobStoreFileID), shared_access_policy=sap) return self.files.make_blob_url(blob_name=bytes(jobStoreFileID)) + '?' + sas_token def getSharedPublicUrl(self, sharedFileName): jobStoreFileID = self._newFileID(sharedFileName) return self.getPublicUrl(jobStoreFileID) def _newJobID(self): # raw UUIDs don't work for Azure property names because the '-' character is disallowed. return str(uuid.uuid4()).replace('-', '_') # A dummy job ID under which all shared files are stored. sharedFileJobID = uuid.UUID('891f7db6-e4d9-4221-a58e-ab6cc4395f94') def _newFileID(self, sharedFileName=None): if sharedFileName is None: ret = bytes(uuid.uuid4()) else: ret = bytes(uuid.uuid5(self.sharedFileJobID, bytes(sharedFileName))) return ret.replace('-', '_') def _associateFileWithJob(self, jobStoreFileID, jobStoreID=None): if jobStoreID is not None: self.jobFileIDs.insert_entity(entity={'PartitionKey': EntityProperty('Edm.String', jobStoreID), 'RowKey': EntityProperty('Edm.String', jobStoreFileID)}) def _dissociateFileFromJob(self, jobStoreFileID): entities = self.jobFileIDs.query_entities(filter="RowKey eq '%s'" % jobStoreFileID) if entities: assert len(entities) == 1 jobStoreID = entities[0].PartitionKey self.jobFileIDs.delete_entity(partition_key=bytes(jobStoreID), row_key=bytes(jobStoreFileID)) def _bindTable(self, tableName, create=False): for attempt in retry_azure(): with attempt: try: tables = self.tableService.query_tables(table_name=tableName) except AzureMissingResourceHttpError as e: if e.status_code != 404: raise else: if tables: assert tables[0].name == tableName return AzureTable(self.tableService, tableName) if create: self.tableService.create_table(tableName) return AzureTable(self.tableService, tableName) else: return None def _bindContainer(self, containerName, create=False): for attempt in retry_azure(): with attempt: try: self.blobService.get_container_properties(containerName) except AzureMissingResourceHttpError as e: if e.status_code == 404: if create: self.blobService.create_container(containerName) else: return None else: raise return AzureBlobContainer(self.blobService, containerName) def _sanitizeTableName(self, tableName): """ Azure table names must start with a letter and be alphanumeric. This will never cause a collision if uuids are used, but otherwise may not be safe. """ return 'a' + ''.join([x for x in tableName if x.isalnum()]) # Maximum bytes that can be in any block of an Azure block blob # https://github.com/Azure/azure-storage-python/blob/4c7666e05a9556c10154508335738ee44d7cb104/azure/storage/blob/blobservice.py#L106 _maxAzureBlockBytes = 4 * 1024 * 1024 @contextmanager def _uploadStream(self, jobStoreFileID, container, checkForModification=False, encrypted=None): """ :param encrypted: True to enforce encryption (will raise exception unless key is set), False to prevent encryption or None to encrypt if key is set. """ if checkForModification: try: expectedVersion = container.get_blob_properties(blob_name=bytes(jobStoreFileID))['etag'] except AzureMissingResourceHttpError: expectedVersion = None if encrypted is None: encrypted = self.keyPath is not None elif encrypted: if self.keyPath is None: raise RuntimeError('Encryption requested but no key was provided') maxBlockSize = self._maxAzureBlockBytes if encrypted: # There is a small overhead for encrypted data. maxBlockSize -= encryption.overhead store = self class UploadPipe(WritablePipe): def readFrom(self, readable): blockIDs = [] try: while True: buf = readable.read(maxBlockSize) if len(buf) == 0: # We're safe to break here even if we never read anything, since # putting an empty block list creates an empty blob. break if encrypted: buf = encryption.encrypt(buf, store.keyPath) blockID = store._newFileID() container.put_block(blob_name=bytes(jobStoreFileID), block=buf, blockid=blockID) blockIDs.append(blockID) except: with panic(log=logger): # This is guaranteed to delete any uncommitted blocks. container.delete_blob(blob_name=bytes(jobStoreFileID)) if checkForModification and expectedVersion is not None: # Acquire a (60-second) write lock, leaseID = container.lease_blob(blob_name=bytes(jobStoreFileID), x_ms_lease_action='acquire')['x-ms-lease-id'] # check for modification, blobProperties = container.get_blob_properties(blob_name=bytes(jobStoreFileID)) if blobProperties['etag'] != expectedVersion: container.lease_blob(blob_name=bytes(jobStoreFileID), x_ms_lease_action='release', x_ms_lease_id=leaseID) raise ConcurrentFileModificationException(jobStoreFileID) # commit the file, container.put_block_list(blob_name=bytes(jobStoreFileID), block_list=blockIDs, x_ms_lease_id=leaseID, x_ms_meta_name_values=dict( encrypted=str(encrypted))) # then release the lock. container.lease_blob(blob_name=bytes(jobStoreFileID), x_ms_lease_action='release', x_ms_lease_id=leaseID) else: # No need to check for modification, just blindly write over whatever # was there. container.put_block_list(blob_name=bytes(jobStoreFileID), block_list=blockIDs, x_ms_meta_name_values=dict(encrypted=str(encrypted))) with UploadPipe() as writable: yield writable @contextmanager def _downloadStream(self, jobStoreFileID, container): # The reason this is not in the writer is so we catch non-existant blobs early blobProps = container.get_blob_properties(blob_name=bytes(jobStoreFileID)) encrypted = strict_bool(blobProps['x-ms-meta-encrypted']) if encrypted and self.keyPath is None: raise AssertionError('Content is encrypted but no key was provided.') outer_self = self class DownloadPipe(ReadablePipe): def writeTo(self, writable): chunkStart = 0 fileSize = int(blobProps['Content-Length']) while chunkStart < fileSize: chunkEnd = chunkStart + outer_self._maxAzureBlockBytes - 1 buf = container.get_blob(blob_name=bytes(jobStoreFileID), x_ms_range="bytes=%d-%d" % (chunkStart, chunkEnd)) if encrypted: buf = encryption.decrypt(buf, outer_self.keyPath) writable.write(buf) chunkStart = chunkEnd + 1 with DownloadPipe() as readable: yield readable
class AzureJobStore(AbstractJobStore): """ A job store that uses Azure's blob store for file storage and Table Service to store job info with strong consistency.""" @classmethod def loadOrCreateJobStore(cls, jobStoreString, config=None, **kwargs): account, namePrefix = jobStoreString.split(':', 1) if '--' in namePrefix: raise ValueError("Invalid name prefix '%s'. Name prefixes may not contain " "%s." % (namePrefix, cls.nameSeparator)) if not cls.containerNameRe.match(namePrefix): raise ValueError("Invalid name prefix '%s'. Name prefixes must contain only digits, " "hyphens or lower-case letters and must not start or end in a " "hyphen." % namePrefix) # Reserve 13 for separator and suffix if len(namePrefix) > cls.maxContainerNameLen - cls.maxNameLen - len(cls.nameSeparator): raise ValueError(("Invalid name prefix '%s'. Name prefixes may not be longer than 50 " "characters." % namePrefix)) if '--' in namePrefix: raise ValueError("Invalid name prefix '%s'. Name prefixes may not contain " "%s." % (namePrefix, cls.nameSeparator)) return cls(account, namePrefix, config=config, **kwargs) # Dots in container names should be avoided because container names are used in HTTPS bucket # URLs where the may interfere with the certificate common name. We use a double # underscore as a separator instead. # containerNameRe = re.compile(r'^[a-z0-9](-?[a-z0-9]+)+[a-z0-9]$') # See https://msdn.microsoft.com/en-us/library/azure/dd135715.aspx # minContainerNameLen = 3 maxContainerNameLen = 63 maxNameLen = 10 nameSeparator = 'xx' # Table names must be alphanumeric # Do not invoke the constructor, use the factory method above. def __init__(self, accountName, namePrefix, config=None, jobChunkSize=maxAzureTablePropertySize): self.jobChunkSize = jobChunkSize self.keyPath = None self.account_key = _fetchAzureAccountKey(accountName) self.accountName = accountName # Table names have strict requirements in Azure self.namePrefix = self._sanitizeTableName(namePrefix) logger.debug("Creating job store with name prefix '%s'" % self.namePrefix) # These are the main API entrypoints. self.tableService = TableService(account_key=self.account_key, account_name=accountName) self.blobService = BlobService(account_key=self.account_key, account_name=accountName) # Register our job-store in the global table for this storage account self.registryTable = self._getOrCreateTable('toilRegistry') exists = self.registryTable.get_entity(row_key=self.namePrefix) self._checkJobStoreCreation(config is not None, exists, accountName + ":" + self.namePrefix) self.registryTable.insert_or_replace_entity(row_key=self.namePrefix, entity={'exists': True}) # Serialized jobs table self.jobItems = self._getOrCreateTable(self.qualify('jobs')) # Job<->file mapping table self.jobFileIDs = self._getOrCreateTable(self.qualify('jobFileIDs')) # Container for all shared and unshared files self.files = self._getOrCreateBlobContainer(self.qualify('files')) # Stats and logging strings self.statsFiles = self._getOrCreateBlobContainer(self.qualify('statsfiles')) # File IDs that contain stats and logging strings self.statsFileIDs = self._getOrCreateTable(self.qualify('statsFileIDs')) super(AzureJobStore, self).__init__(config=config) if self.config.cseKey is not None: self.keyPath = self.config.cseKey # Length of a jobID - used to test if a stats file has been read already or not jobIDLength = len(str(uuid.uuid4())) def qualify(self, name): return self.namePrefix + self.nameSeparator + name def jobs(self): # How many jobs have we done? total_processed = 0 for jobEntity in self.jobItems.query_entities_auto(): # Process the items in the page yield AzureJob.fromEntity(jobEntity) total_processed += 1 if total_processed % 1000 == 0: # Produce some feedback for the user, because this can take # a long time on, for example, Azure logger.info("Processed %d total jobs" % total_processed) logger.info("Processed %d total jobs" % total_processed) def create(self, command, memory, cores, disk, preemptable, predecessorNumber=0): jobStoreID = self._newJobID() job = AzureJob(jobStoreID=jobStoreID, command=command, memory=memory, cores=cores, disk=disk, preemptable=preemptable, remainingRetryCount=self._defaultTryCount(), logJobStoreFileID=None, predecessorNumber=predecessorNumber) entity = job.toItem(chunkSize=self.jobChunkSize) entity['RowKey'] = jobStoreID self.jobItems.insert_entity(entity=entity) return job def exists(self, jobStoreID): if self.jobItems.get_entity(row_key=jobStoreID) is None: return False return True def load(self, jobStoreID): jobEntity = self.jobItems.get_entity(row_key=jobStoreID) if jobEntity is None: raise NoSuchJobException(jobStoreID) return AzureJob.fromEntity(jobEntity) def update(self, job): self.jobItems.update_entity(row_key=job.jobStoreID, entity=job.toItem(chunkSize=self.jobChunkSize)) def delete(self, jobStoreID): try: self.jobItems.delete_entity(row_key=jobStoreID) except AzureMissingResourceHttpError: # Job deletion is idempotent, and this job has been deleted already return filterString = "PartitionKey eq '%s'" % jobStoreID for fileEntity in self.jobFileIDs.query_entities(filter=filterString): jobStoreFileID = fileEntity.RowKey self.deleteFile(jobStoreFileID) def deleteJobStore(self): self.registryTable.delete_entity(row_key=self.namePrefix) self.jobItems.delete_table() self.jobFileIDs.delete_table() self.files.delete_container() self.statsFiles.delete_container() self.statsFileIDs.delete_table() def getEnv(self): return dict(AZURE_ACCOUNT_KEY=self.account_key) @classmethod def _readFromUrl(cls, url, writable): blobService, containerName, blobName = cls._extractBlobInfoFromUrl(url) blobService.get_blob_to_file(containerName, blobName, writable) @classmethod def _writeToUrl(cls, readable, url): blobService, containerName, blobName = cls._extractBlobInfoFromUrl(url) blobService.put_block_blob_from_file(containerName, blobName, readable) blobService.get_blob(containerName, blobName) @staticmethod def _extractBlobInfoFromUrl(url): """ :return: (blobService, containerName, blobName) """ def invalidUrl(): raise RuntimeError("The URL '%s' is invalid" % url.geturl()) netloc = url.netloc.split('@') if len(netloc) != 2: invalidUrl() accountEnd = netloc[1].find('.blob.core.windows.net') if accountEnd == -1: invalidUrl() containerName, accountName = netloc[0], netloc[1][0:accountEnd] blobName = url.path[1:] # urlparse always includes a leading '/' blobService = BlobService(account_key=_fetchAzureAccountKey(accountName), account_name=accountName) return blobService, containerName, blobName @classmethod def _supportsUrl(cls, url, export=False): return url.scheme.lower() == 'wasb' or url.scheme.lower() == 'wasbs' def writeFile(self, localFilePath, jobStoreID=None): jobStoreFileID = self._newFileID() self.updateFile(jobStoreFileID, localFilePath) self._associateFileWithJob(jobStoreFileID, jobStoreID) return jobStoreFileID def updateFile(self, jobStoreFileID, localFilePath): with open(localFilePath) as read_fd: with self._uploadStream(jobStoreFileID, self.files) as write_fd: while True: buf = read_fd.read(self._maxAzureBlockBytes) write_fd.write(buf) if len(buf) == 0: break def readFile(self, jobStoreFileID, localFilePath): try: with self._downloadStream(jobStoreFileID, self.files) as read_fd: with open(localFilePath, 'w') as write_fd: while True: buf = read_fd.read(self._maxAzureBlockBytes) write_fd.write(buf) if not buf: break except AzureMissingResourceHttpError: raise NoSuchFileException(jobStoreFileID) def deleteFile(self, jobStoreFileID): try: self.files.delete_blob(blob_name=jobStoreFileID) self._dissociateFileFromJob(jobStoreFileID) except AzureMissingResourceHttpError: pass def fileExists(self, jobStoreFileID): # As Azure doesn't have a blob_exists method (at least in the # python API) we just try to download the metadata, and hope # the metadata is small so the call will be fast. try: self.files.get_blob_metadata(blob_name=jobStoreFileID) return True except AzureMissingResourceHttpError: return False @contextmanager def writeFileStream(self, jobStoreID=None): # TODO: this (and all stream methods) should probably use the # Append Blob type, but that is not currently supported by the # Azure Python API. jobStoreFileID = self._newFileID() with self._uploadStream(jobStoreFileID, self.files) as fd: yield fd, jobStoreFileID self._associateFileWithJob(jobStoreFileID, jobStoreID) @contextmanager def updateFileStream(self, jobStoreFileID): with self._uploadStream(jobStoreFileID, self.files, checkForModification=True) as fd: yield fd def getEmptyFileStoreID(self, jobStoreID=None): jobStoreFileID = self._newFileID() self.files.put_blob(blob_name=jobStoreFileID, blob='', x_ms_blob_type='BlockBlob') self._associateFileWithJob(jobStoreFileID, jobStoreID) return jobStoreFileID @contextmanager def readFileStream(self, jobStoreFileID): if not self.fileExists(jobStoreFileID): raise NoSuchFileException(jobStoreFileID) with self._downloadStream(jobStoreFileID, self.files) as fd: yield fd @contextmanager def writeSharedFileStream(self, sharedFileName, isProtected=None): assert self._validateSharedFileName(sharedFileName) sharedFileID = self._newFileID(sharedFileName) with self._uploadStream(sharedFileID, self.files, encrypted=isProtected) as fd: yield fd @contextmanager def readSharedFileStream(self, sharedFileName): assert self._validateSharedFileName(sharedFileName) sharedFileID = self._newFileID(sharedFileName) if not self.fileExists(sharedFileID): raise NoSuchFileException(sharedFileID) with self._downloadStream(sharedFileID, self.files) as fd: yield fd def writeStatsAndLogging(self, statsAndLoggingString): # TODO: would be a great use case for the append blobs, once implemented in the Azure SDK jobStoreFileID = self._newFileID() encrypted = self.keyPath is not None if encrypted: statsAndLoggingString = encryption.encrypt(statsAndLoggingString, self.keyPath) self.statsFiles.put_block_blob_from_text(blob_name=jobStoreFileID, text=statsAndLoggingString, x_ms_meta_name_values=dict( encrypted=str(encrypted))) self.statsFileIDs.insert_entity(entity={'RowKey': jobStoreFileID}) def readStatsAndLogging(self, callback, readAll=False): suffix = '_old' numStatsFiles = 0 for entity in self.statsFileIDs.query_entities(): jobStoreFileID = entity.RowKey hasBeenRead = len(jobStoreFileID) > self.jobIDLength if not hasBeenRead: with self._downloadStream(jobStoreFileID, self.statsFiles) as fd: callback(fd) # Mark this entity as read by appending the suffix self.statsFileIDs.insert_entity(entity={'RowKey': jobStoreFileID + suffix}) self.statsFileIDs.delete_entity(row_key=jobStoreFileID) numStatsFiles += 1 elif readAll: # Strip the suffix to get the original ID jobStoreFileID = jobStoreFileID[:-len(suffix)] with self._downloadStream(jobStoreFileID, self.statsFiles) as fd: callback(fd) numStatsFiles += 1 return numStatsFiles _azureTimeFormat = "%Y-%m-%dT%H:%M:%SZ" def getPublicUrl(self, jobStoreFileID): try: self.files.get_blob_properties(blob_name=jobStoreFileID) except AzureMissingResourceHttpError: raise NoSuchFileException(jobStoreFileID) # Compensate of a little bit of clock skew startTimeStr = (datetime.utcnow() - timedelta(minutes=5)).strftime(self._azureTimeFormat) endTime = datetime.utcnow() + self.publicUrlExpiration endTimeStr = endTime.strftime(self._azureTimeFormat) sap = SharedAccessPolicy(AccessPolicy(startTimeStr, endTimeStr, BlobSharedAccessPermissions.READ)) sas_token = self.files.generate_shared_access_signature(blob_name=jobStoreFileID, shared_access_policy=sap) return self.files.make_blob_url(blob_name=jobStoreFileID) + '?' + sas_token def getSharedPublicUrl(self, sharedFileName): jobStoreFileID = self._newFileID(sharedFileName) return self.getPublicUrl(jobStoreFileID) def _newJobID(self): # raw UUIDs don't work for Azure property names because the '-' character is disallowed. return str(uuid.uuid4()).replace('-', '_') # A dummy job ID under which all shared files are stored. sharedFileJobID = uuid.UUID('891f7db6-e4d9-4221-a58e-ab6cc4395f94') def _newFileID(self, sharedFileName=None): if sharedFileName is None: ret = str(uuid.uuid4()) else: ret = str(uuid.uuid5(self.sharedFileJobID, str(sharedFileName))) return ret.replace('-', '_') def _associateFileWithJob(self, jobStoreFileID, jobStoreID=None): if jobStoreID is not None: self.jobFileIDs.insert_entity(entity={'PartitionKey': jobStoreID, 'RowKey': jobStoreFileID}) def _dissociateFileFromJob(self, jobStoreFileID): entities = self.jobFileIDs.query_entities(filter="RowKey eq '%s'" % jobStoreFileID) if entities: assert len(entities) == 1 jobStoreID = entities[0].PartitionKey self.jobFileIDs.delete_entity(partition_key=jobStoreID, row_key=jobStoreFileID) def _getOrCreateTable(self, tableName): # This will not fail if the table already exists. for attempt in retry_azure(): with attempt: self.tableService.create_table(tableName) return AzureTable(self.tableService, tableName) def _getOrCreateBlobContainer(self, containerName): for attempt in retry_azure(): with attempt: self.blobService.create_container(containerName) return AzureBlobContainer(self.blobService, containerName) def _sanitizeTableName(self, tableName): """ Azure table names must start with a letter and be alphanumeric. This will never cause a collision if uuids are used, but otherwise may not be safe. """ return 'a' + filter(lambda x: x.isalnum(), tableName) # Maximum bytes that can be in any block of an Azure block blob # https://github.com/Azure/azure-storage-python/blob/4c7666e05a9556c10154508335738ee44d7cb104/azure/storage/blob/blobservice.py#L106 _maxAzureBlockBytes = 4 * 1024 * 1024 @contextmanager def _uploadStream(self, jobStoreFileID, container, checkForModification=False, encrypted=None): """ :param encrypted: True to enforce encryption (will raise exception unless key is set), False to prevent encryption or None to encrypt if key is set. """ if checkForModification: try: expectedVersion = container.get_blob_properties(blob_name=jobStoreFileID)['etag'] except AzureMissingResourceHttpError: expectedVersion = None if encrypted is None: encrypted = self.keyPath is not None elif encrypted: if self.keyPath is None: raise RuntimeError('Encryption requested but no key was provided') maxBlockSize = self._maxAzureBlockBytes if encrypted: # There is a small overhead for encrypted data. maxBlockSize -= encryption.overhead readable_fh, writable_fh = os.pipe() with os.fdopen(readable_fh, 'r') as readable: with os.fdopen(writable_fh, 'w') as writable: def reader(): blockIDs = [] try: while True: buf = readable.read(maxBlockSize) if len(buf) == 0: # We're safe to break here even if we never read anything, since # putting an empty block list creates an empty blob. break if encrypted: buf = encryption.encrypt(buf, self.keyPath) blockID = self._newFileID() container.put_block(blob_name=jobStoreFileID, block=buf, blockid=blockID) blockIDs.append(blockID) except: # This is guaranteed to delete any uncommitted # blocks. container.delete_blob(blob_name=jobStoreFileID) raise if checkForModification and expectedVersion is not None: # Acquire a (60-second) write lock, leaseID = container.lease_blob(blob_name=jobStoreFileID, x_ms_lease_action='acquire')['x-ms-lease-id'] # check for modification, blobProperties = container.get_blob_properties(blob_name=jobStoreFileID) if blobProperties['etag'] != expectedVersion: container.lease_blob(blob_name=jobStoreFileID, x_ms_lease_action='release', x_ms_lease_id=leaseID) raise ConcurrentFileModificationException(jobStoreFileID) # commit the file, container.put_block_list(blob_name=jobStoreFileID, block_list=blockIDs, x_ms_lease_id=leaseID, x_ms_meta_name_values=dict( encrypted=str(encrypted))) # then release the lock. container.lease_blob(blob_name=jobStoreFileID, x_ms_lease_action='release', x_ms_lease_id=leaseID) else: # No need to check for modification, just blindly write over whatever # was there. container.put_block_list(blob_name=jobStoreFileID, block_list=blockIDs, x_ms_meta_name_values=dict( encrypted=str(encrypted))) thread = ExceptionalThread(target=reader) thread.start() yield writable # The writable is now closed. This will send EOF to the readable and cause that # thread to finish. thread.join() @contextmanager def _downloadStream(self, jobStoreFileID, container): # The reason this is not in the writer is so we catch non-existant blobs early blobProps = container.get_blob_properties(blob_name=jobStoreFileID) encrypted = strict_bool(blobProps['x-ms-meta-encrypted']) if encrypted and self.keyPath is None: raise AssertionError('Content is encrypted but no key was provided.') readable_fh, writable_fh = os.pipe() with os.fdopen(readable_fh, 'r') as readable: with os.fdopen(writable_fh, 'w') as writable: def writer(): try: chunkStartPos = 0 fileSize = int(blobProps['Content-Length']) while chunkStartPos < fileSize: chunkEndPos = chunkStartPos + self._maxAzureBlockBytes - 1 buf = container.get_blob(blob_name=jobStoreFileID, x_ms_range="bytes=%d-%d" % (chunkStartPos, chunkEndPos)) if encrypted: buf = encryption.decrypt(buf, self.keyPath) writable.write(buf) chunkStartPos = chunkEndPos + 1 finally: # Ensure readers aren't left blocking if this thread crashes. # This close() will send EOF to the reading end and ultimately cause the # yield to return. It also makes the implict .close() done by the enclosing # "with" context redundant but that should be ok since .close() on file # objects are idempotent. writable.close() thread = ExceptionalThread(target=writer) thread.start() yield readable thread.join()
def getTableService(): table_service = TableService(account_name=app.config["AZURE_STORAGE_ACCOUNT_NAME"], account_key=app.config["AZURE_STORAGE_ACCOUNT_KEY"], is_emulated = app.config["AZURE_STORAGE_ACCOUNT_IS_EMULATED"]) table_service.create_table('maps') return table_service
EMPLOYEE_TABLE_NAME = 'employees' if STORAGE_ACCOUNT_NAME and STORAGE_ACCOUNT_KEY: # We have connection details, so use the real # table service class. from azure.storage.table import TableService else: # No connection has been provided, so create a # mock service class that keeps values in memory. from mocktableservice import TableService table_service = TableService(STORAGE_ACCOUNT_NAME, STORAGE_ACCOUNT_KEY) # Ensure the table exists. table_service.create_table(EMPLOYEE_TABLE_NAME) class Employee(object): def __init__(self, name, has_a_job): self.name = name self.has_a_job = has_a_job def fix(self): self.has_a_job = not self.has_a_job table_service.insert_or_replace_entity(EMPLOYEE_TABLE_NAME, { "PartitionKey": "_", "RowKey": self.name, "has_a_job": self.has_a_job, }) @classmethod
class TableStorageHandler(logging.Handler): """ Handler class which writes log messages to a Azure Storage table. """ MAX_BATCH_SIZE = 100 def __init__(self, account_name=None, account_key=None, protocol='https', table='logs', batch_size=0, extra_properties=None, partition_key_formatter=None, row_key_formatter=None, is_emulated=False, ): """ Initialize the handler. """ logging.Handler.__init__(self) self.service = TableService(account_name=account_name, account_key=account_key, is_emulated=is_emulated, protocol=protocol) self.meta = {'hostname': gethostname(), 'process': os.getpid()} self.table = _formatName(table, self.meta) self.ready = False self.rowno = 0 if not partition_key_formatter: # default format for partition keys fmt = '%(asctime)s' datefmt = '%Y%m%d%H%M' partition_key_formatter = logging.Formatter(fmt, datefmt) self.partition_key_formatter = partition_key_formatter if not row_key_formatter: # default format for row keys fmt = '%(asctime)s%(msecs)03d-%(hostname)s-%(process)d-%(rowno)02d' datefmt = '%Y%m%d%H%M%S' row_key_formatter = logging.Formatter(fmt, datefmt) self.row_key_formatter = row_key_formatter # extra properties and formatters for them self.extra_properties = extra_properties if extra_properties: self.extra_property_formatters = {} self.extra_property_names = {} for extra in extra_properties: if _PY3: f = logging.Formatter(fmt=extra, style=extra[0]) else: f = logging.Formatter(fmt=extra) self.extra_property_formatters[extra] = f self.extra_property_names[extra] = self._getFormatName(extra) # the storage emulator doesn't support batch operations if batch_size <= 1 or is_emulated: self.batch = None else: self.batch = TableBatch() if batch_size > TableStorageHandler.MAX_BATCH_SIZE: self.batch_size = TableStorageHandler.MAX_BATCH_SIZE else: self.batch_size = batch_size if self.batch: self.current_partition_key = None def _copyLogRecord(self, record): copy = logging.makeLogRecord(record.__dict__) copy.exc_info = None copy.exc_text = None if _PY3: copy.stack_info = None return copy def _getFormatName(self, extra): name = extra style = extra[0] if style == '%': name = extra[2:extra.index(')')] elif _PY3: if style == '{': name = next(string.Formatter().parse(extra))[1] elif style == '$': name = extra[1:] if name.startswith('{'): name = name[1:-1] return name def emit(self, record): """ Emit a record. Format the record and send it to the specified table. """ try: if not self.ready: self.service.create_table(self.table) self.ready = True # generate partition key for the entity record.hostname = self.meta['hostname'] copy = self._copyLogRecord(record) partition_key = self.partition_key_formatter.format(copy) # ensure entities in the batch all have the same patition key if self.batch: if self.current_partition_key is not None: if partition_key != self.current_partition_key: self.flush() self.current_partition_key = partition_key # add log message and extra properties to the entity entity = {} if self.extra_properties: for extra in self.extra_properties: formatter = self.extra_property_formatters[extra] name = self.extra_property_names[extra] entity[name] = formatter.format(copy) entity['message'] = self.format(record) # generate row key for the entity copy.rowno = self.rowno row_key = self.row_key_formatter.format(copy) # add entitiy to the table entity['PartitionKey'] = partition_key entity['RowKey'] = row_key if not self.batch: self.service.insert_or_replace_entity(self.table, entity) else: self.batch.insert_or_replace_entity(entity) # commit the ongoing batch if it reaches the high mark self.rowno += 1 if self.rowno >= self.batch_size: self.flush() except (KeyboardInterrupt, SystemExit): raise except: self.handleError(record) def flush(self): """ Ensure all logging output has been flushed. """ if self.batch and self.rowno > 0: try: self.service.commit_batch(self.table, self.batch) finally: self.rowno = 0 self.batch = TableBatch() def setFormatter(self, fmt): """ Set the message formatter. """ super(TableStorageHandler, self).setFormatter(fmt) if self.extra_properties: logging._acquireLock() try: for extra in self.extra_property_formatters.values(): extra.converter = fmt.converter extra.datefmt = fmt.datefmt if _PY3: extra.default_time_format = fmt.default_time_format extra.default_msec_format = fmt.default_msec_format finally: logging._releaseLock() def setPartitionKeyFormatter(self, fmt): """ Set the partition key formatter. """ self.partition_key_formatter = fmt def setRowKeyFormatter(self, fmt): """ Set the row key formatter. """ self.row_key_formatter = fmt
stor_acc_name = service_keys['stor_acc_name'] stor_acc_key = service_keys['stor_acc_key'] redis_pass = service_keys['redis_pass'] redis_server = service_keys['redis_server'] instr_key = service_keys['instr_key'] # storage account_name = stor_acc_name account_key = stor_acc_key blob_service = BlobService(account_name, account_key) blob_service.create_container('images') queue_service = QueueService(account_name, account_key) queue_service.create_queue('taskqueue') table_service = TableService(account_name, account_key) table_service.create_table('tasktable') r = redis.StrictRedis(host=redis_server, port=6380, db=0, password=redis_pass, ssl=True) tc = TelemetryClient(instr_key) @app.route('/') @app.route('/home') def form(): return render_template('form_submit.html') @app.route('/hello/', methods=['POST']) def hello(): mobile = request.form['yourmobile']
invoke (no sudo required): python azure_sense.py """ import time from sense_hat import SenseHat from datetime import datetime from azure.storage.table import TableService __author__ = "Anatoly Mironov @mirontoli" sense = SenseHat() table_service = TableService(account_name='tolle', account_key='ho2zakf/8rmDckS3pGOTPWwIwCzNwVJxd5hDb3R15wms2fZJG/aX53PDsTWBYsuTPwF7802IKk2QcrJ5FO7i6w==') table_name = 'climateData' partition_key = 'climate2' table_service.create_table(table_name, False) while True: sense.show_letter('S', text_colour=[0, 114, 198]) date = datetime.now() iso_date = date.isoformat() raw_temp = sense.temp #calculate temperature https://www.raspberrypi.org/forums/viewtopic.php?t=111457&p=769672 calctemp = 0.0071 * raw_temp * raw_temp + 0.86 * raw_temp - 10.0 temp = "{0:.2f}".format(calctemp) humidity = "{0:.2f}".format(sense.humidity) pressure = "{0:.2f}".format(sense.pressure) entry = {'PartitionKey': partition_key, 'RowKey': iso_date, 'Temperature': temp, 'Humidity':humidity, 'Pressure':pressure} table_service.insert_entity(table_name, entry) time.sleep(2) sense.clear()
class Repository(object): """Azure Table Storage repository.""" def __init__(self, settings): """Initializes the repository with the specified settings dict. Required settings are: - STORAGE_NAME - STORAGE_KEY - STORAGE_TABLE_POLL - STORAGE_TABLE_CHOICE """ self.name = 'Azure Table Storage' self.storage_name = settings['STORAGE_NAME'] self.storage_key = settings['STORAGE_KEY'] self.poll_table = settings['STORAGE_TABLE_POLL'] self.choice_table = settings['STORAGE_TABLE_CHOICE'] self.svc = TableService(self.storage_name, self.storage_key) self.svc.create_table(self.poll_table) self.svc.create_table(self.choice_table) def get_polls(self): """Returns all the polls from the repository.""" poll_entities = self.svc.query_entities(self.poll_table) polls = [_poll_from_entity(entity) for entity in poll_entities] return polls def get_poll(self, poll_key): """Returns a poll from the repository.""" try: partition, row = _key_to_partition_and_row(poll_key) poll_entity = self.svc.get_entity(self.poll_table, partition, row) choice_entities = self.svc.query_entities( self.choice_table, "PollPartitionKey eq '{0}' and PollRowKey eq '{1}'" \ .format(partition, row) ) poll = _poll_from_entity(poll_entity) poll.choices = [_choice_from_entity(choice_entity) for choice_entity in choice_entities] return poll except AzureMissingResourceHttpError: raise PollNotFound() def increment_vote(self, poll_key, choice_key): """Increment the choice vote count for the specified poll.""" try: partition, row = _key_to_partition_and_row(choice_key) entity = self.svc.get_entity(self.choice_table, partition, row) entity.Votes += 1 self.svc.update_entity(self.choice_table, entity) except AzureMissingResourceHttpError: raise PollNotFound() def add_sample_polls(self): """Adds a set of polls from data stored in a samples.json file.""" poll_partition = '2014' poll_row = 0 choice_partition = '2014' choice_row = 0 for sample_poll in _load_samples_json(): poll_entity = { 'PartitionKey': poll_partition, 'RowKey': str(poll_row), 'Text': sample_poll['text'], } self.svc.insert_entity(self.poll_table, poll_entity) for sample_choice in sample_poll['choices']: choice_entity = { 'PartitionKey': choice_partition, 'RowKey': str(choice_row), 'Text': sample_choice, 'Votes': 0, 'PollPartitionKey': poll_partition, 'PollRowKey': str(poll_row), } self.svc.insert_entity(self.choice_table, choice_entity) choice_row += 1 poll_row += 1
import uuid import json import random STORAGE_ACCOUNT_NAME = os.environ['STORAGE_ACCOUNT_NAME'] STORAGE_ACCOUNT_KEY = os.environ['STORAGE_ACCOUNT_KEY'] table_service = TableService(account_name=STORAGE_ACCOUNT_NAME, account_key=STORAGE_ACCOUNT_KEY) file_service = FileService(account_name=STORAGE_ACCOUNT_NAME, account_key=STORAGE_ACCOUNT_KEY) block_blob_service = BlockBlobService(account_name=STORAGE_ACCOUNT_NAME, account_key=STORAGE_ACCOUNT_KEY) block_blob_service.create_container('telemetry') table_service.create_table('cycles') databricks_url = os.environ['DATABRICKS_URL'] FEATURIZER_JAR_URL = os.environ['FEATURIZER_JAR_URL'] access_token = os.environ['DATABRICKS_TOKEN'] IOT_HUB_NAME = os.environ['IOT_HUB_NAME'] EVENT_HUB_ENDPOINT = os.environ['EVENT_HUB_ENDPOINT'] StorageConnectionString = "DefaultEndpointsProtocol=https;AccountName=" + STORAGE_ACCOUNT_NAME + ";AccountKey=" + STORAGE_ACCOUNT_KEY + ";EndpointSuffix=core.windows.net" bearer_token = 'Bearer ' + access_token json_data = {'Authorization': bearer_token} url = FEATURIZER_JAR_URL + '/featurizer_2.11-1.0.jar' urllib.request.urlretrieve(url, 'D:/home/site/jars/featurizer_2.11-1.0.jar') #upload jar
class az(object): def __init__(self, default_table_name=DEFAULT_TABLE, partitionKey='default'): self.TABLE_STORAGE_KEY = os.getenv('AZURE_STORAGE_KEY') self.STORAGE_NAME = os.getenv('STORAGE_NAME') self.default_table_name = default_table_name self.default_partition = partitionKey if self.TABLE_STORAGE_KEY == None: from tokens import TABLE_STORAGE_ACCESS_KEY, STORAGE_ACCOUNT_NAME self.TABLE_STORAGE_KEY = TABLE_STORAGE_ACCESS_KEY self.STORAGE_NAME = STORAGE_ACCOUNT_NAME self.table_service = TableService(account_name=self.STORAGE_NAME, account_key=self.TABLE_STORAGE_KEY) #create_table_if_does_not_exists(self.default_table_name) def insert_or_replace_entity_to_azure(self, rowKey, entry, t_name=DEFAULT_TABLE): ''' takes table service Takes a list Uploads to azure table storage ''' segment = Entity() segment.PartitionKey = self.default_partition segment.RowKey = str(rowKey).zfill(8) segment.latA = str(entry['latA']) segment.longA = str(entry['longA']) segment.latB = str(entry['latB']) segment.longB = str(entry['longB']) segment.colorKey = str(entry['color']) #print segment.colorKey if os.name == 'nt': self.table_service.insert_or_replace_entity(t_name, self.default_partition, str(rowKey).zfill(8), segment) else: self.table_service.insert_or_replace_entity(t_name, segment) def create_table(self, name): return self.table_service.create_table(name) def delete_table(self, name): return self.table_service.delete_table(name) def delete_entity_by_rowKey(self, rowKey, table_name=DEFAULT_TABLE): return self.table_service.delete_entity(table_name, self.default_partition, rowKey) def does_table_exist(self, table_name): if os.name == 'nt': for i in self.table_service.query_tables(): if i.name == table_name: return True else: for i in self.table_service.list_tables(): if i.name == table_name: return True return False def list_tables(self): if os.name == 'nt': for j in self.table_service.query_tables(): print j.name else: for j in self.table_service.list_tables(): print j.name def create_table_if_does_not_exist(self, table_name=DEFAULT_TABLE): if self.does_table_exist(table_name): return 'already exists' else: self.table_service.create_table(table_name) def create_entry(self, latA, lonA, latB, lonB, bumpiness): x = { 'latA':latA, 'longA':lonA, 'latB':latB, 'longB':lonB, 'color': bumpiness } return x def create_random_entry(self): x = { 'latA':random.uniform(37,38), 'longA':random.uniform(-122,-123), 'latB':random.uniform(37,38), 'longB':random.uniform(-122,-123), 'color': random.randint(0,7) } return x def create_and_insert_or_replace_entity_azure(self, latA, lonA, latB, lonB, bumpiness, rowKey, table_name=DEFAULT_TABLE ): return self.insert_or_replace_entity_to_azure(rowKey, create_entry(latA, lonA, latB, lonB, bumpiness), table_name)