def get_blob_client_by_uri(storage_uri): container_name, storage_name, key = StorageUtilities.get_storage_from_uri(storage_uri) blob_service = BlockBlobService(account_name=storage_name, account_key=key) blob_service.create_container(container_name) return blob_service, container_name
class BlobUploader(object): def __init__(self, blob_container=None, make_container_public=False): """ Class to handle uploading to an azure blob connection. :param make_container_public: True iff you are okay with public read access to your data. Useful for teaching a course :return: """ self.blob_container = blob_container or BLOB_CONTAINER self.blob_service = BlockBlobService(account_name=BLOB_ACCOUNTNAME, account_key=BLOB_ACCOUNTKEY) # if make_container_public: # self.blob_service.create_container(BLOB_CONTAINER, public_access=PublicAccess) # else: # self.blob_service.create_container(BLOB_CONTAINER) def put_json_file(self, file_obj, filename): """ Put a file into azure blob store. Allows user to specify format. For example, once could use: <prefix>/YYYYMMDD.json """ file_obj.seek(0) self.blob_service.create_blob_from_path( self.blob_container, filename, file_obj.name, content_settings=ContentSettings(content_type="text/json") )
def test_sas_signed_identifier(self): # SAS URL is calculated from storage key, so this test runs live only if TestMode.need_recording_file(self.test_mode): return # Arrange blob_name = self._create_block_blob() access_policy = AccessPolicy() access_policy.start = '2011-10-11' access_policy.expiry = '2018-10-12' access_policy.permission = BlobPermissions.READ identifiers = {'testid': access_policy} resp = self.bs.set_container_acl(self.container_name, identifiers) token = self.bs.generate_blob_shared_access_signature( self.container_name, blob_name, id='testid' ) # Act service = BlockBlobService( self.settings.STORAGE_ACCOUNT_NAME, sas_token=token, request_session=requests.Session(), ) self._set_test_proxy(service, self.settings) result = service.get_blob_to_bytes(self.container_name, blob_name) # Assert self.assertEqual(self.byte_data, result.content)
def test_sas_access_blob(self): # SAS URL is calculated from storage key, so this test runs live only if TestMode.need_recording_file(self.test_mode): return # Arrange blob_name = self._create_block_blob() token = self.bs.generate_blob_shared_access_signature( self.container_name, blob_name, permission=BlobPermissions.READ, expiry=datetime.utcnow() + timedelta(hours=1), ) # Act service = BlockBlobService( self.settings.STORAGE_ACCOUNT_NAME, sas_token=token, request_session=requests.Session(), ) self._set_test_proxy(service, self.settings) result = service.get_blob_to_bytes(self.container_name, blob_name) # Assert self.assertEqual(self.byte_data, result.content)
def update(self, area, selector, content_type, buffer): assert area is not None, 'area is none; should already be validated' area_config = config.load_area(area) storage_config = config.load_storage(area_config['storage']) area = area.lower() # httplib.HTTPConnection.debuglevel = 1 # http.client.HTTPConnection.debuglevel = 1 blob_service = BlockBlobService(account_name=storage_config['name'], account_key=storage_config['key1']) hash = base64.b64encode(hashlib.md5(buffer).digest()) content_settings = ContentSettings(content_md5=hash) if content_type is not None and len(content_type) > 0: content_settings.content_type = content_type blob_service.create_blob_from_bytes( area_config['container'], selector, buffer, content_settings=content_settings, validate_content=False ) return hash
class azureobject(object): def __init__(self, azure_config): if 'account name' in azure_config and azure_config['account name'] is not None and 'account key' in azure_config and azure_config['account key'] is not None and 'container' in azure_config and azure_config['container'] is not None: self.conn = BlockBlobService(account_name=azure_config['account name'], account_key=azure_config['account key']) self.container = azure_config['container'] else: raise Exception("Cannot connect to Azure without account name, account key, and container specified") def get_key(self, key_name): new_key = azurekey(self, key_name, load=False) if new_key.exists(): new_key.get_properties() new_key.does_exist = True else: new_key.does_exist = False return new_key def search_key(self, key_name): for blob in self.conn.list_blobs(self.container, prefix=key_name, delimiter='/'): if blob.name == key_name: return azurekey(self, blob.name) return None def list_keys(self, prefix): output = list() for blob in self.conn.list_blobs(self.container, prefix=prefix, delimiter='/'): output.append(azurekey(self, blob.name)) return output
def sas_with_signed_identifiers(self): container_name = self._create_container() self.service.create_blob_from_text(container_name, 'blob1', b'hello world') # Set access policy on container access_policy = AccessPolicy(permission=ContainerPermissions.READ, expiry=datetime.utcnow() + timedelta(hours=1)) identifiers = {'id': access_policy} acl = self.service.set_container_acl(container_name, identifiers) # Wait 30 seconds for acl to propagate time.sleep(30) # Indicates to use the access policy set on the container token = self.service.generate_container_shared_access_signature( container_name, id='id' ) # Create a service and use the SAS sas_service = BlockBlobService( account_name=self.account.account_name, sas_token=token, ) blob = sas_service.get_blob_to_text(container_name, 'blob1') content = blob.content # hello world self.service.delete_container(container_name)
def get_blob_client_by_uri(storage_uri, session): storage = StorageUtilities.get_storage_from_uri(storage_uri, session) blob_service = BlockBlobService( account_name=storage.storage_name, token_credential=storage.token) blob_service.create_container(storage.container_name) return blob_service, storage.container_name, storage.file_prefix
def request_session(self): # A custom request session may be used to set special network options session = requests.Session() client = BlockBlobService(account_name='<account_name>', account_key='<account_key>', request_session=session) # Set later client = BlockBlobService(account_name='<account_name>', account_key='<account_key>') client.request_session = session
def protocol(self): # https is the default protocol and is strongly recommended for security # However, http may be used if desired client = BlockBlobService(account_name='<account_name>', account_key='<account_key>', protocol='http') # Set later client = BlockBlobService(account_name='<account_name>', account_key='<account_key>') client.protocol = 'http'
def store(self): from azure.storage.blob import BlockBlobService container = uuid() conn_string = create_azure_conn_string(load_azure_credentials()) s = BlockBlobService(connection_string=conn_string) yield AzureBlockBlobStore(conn_string=conn_string, container=container, public=False) s.delete_container(container)
def test_get_put_blob(self): import config as config account_name = config.STORAGE_ACCOUNT_NAME account_key = config.STORAGE_ACCOUNT_KEY block_blob_service = BlockBlobService(account_name, account_key) block_blob_service.create_blob_from_path( 'cont2', 'sunset.png', 'sunset.png',) block_blob_service.get_blob_to_path('cont2', 'sunset.png', 'out-sunset.png')
def block_blob_service(self): from azure.storage.blob import BlockBlobService, PublicAccess block_blob_service = BlockBlobService( connection_string=self.conn_string) if self.create_if_missing: block_blob_service.create_container( self.container, public_access=PublicAccess.Container if self.public else None ) return block_blob_service
class _BlobStorageFileHandler(object): def __init__(self, account_name=None, account_key=None, protocol='https', container='logs', zip_compression=False, max_connections=1, max_retries=5, retry_wait=1.0, is_emulated=False): self.service = BlockBlobService(account_name=account_name, account_key=account_key, is_emulated=is_emulated, protocol=protocol) self.container_created = False hostname = gethostname() self.meta = {'hostname': hostname.replace('_', '-'), 'process': os.getpid()} self.container = (container % self.meta).lower() self.meta['hostname'] = hostname self.zip_compression = zip_compression self.max_connections = max_connections self.max_retries = max_retries self.retry_wait = retry_wait def put_file_into_storage(self, dirName, fileName): """ Ship the outdated log file to the specified blob container. """ if not self.container_created: self.service.create_container(self.container) self.container_created = True fd, tmpfile_path = None, '' try: file_path = os.path.join(dirName, fileName) if self.zip_compression: suffix, content_type = '.zip', 'application/zip' fd, tmpfile_path = mkstemp(suffix=suffix) with os.fdopen(fd, 'wb') as f: with ZipFile(f, 'w', ZIP_DEFLATED) as z: z.write(file_path, arcname=fileName) file_path = tmpfile_path else: suffix, content_type = '', 'text/plain' self.service.create_blob_from_path(container_name=self.container, blob_name=fileName+suffix, file_path=file_path, content_settings=ContentSettings(content_type=content_type), max_connections=self.max_connections ) # max_retries and retry_wait no longer arguments in azure 0.33 finally: if self.zip_compression and fd: os.remove(tmpfile_path)
def upload_file(STORAGE_NAME, STORAGE_KEY, NEW_CONTAINER_NAME, file, path, extension, content_type): """create blob service, and upload files to container""" blob_service = BlockBlobService(account_name= STORAGE_NAME, account_key=STORAGE_KEY) try: blob_service.create_blob_from_path(NEW_CONTAINER_NAME, file, path, content_settings=ContentSettings(content_type= content_type+extension)) print("{} // BLOB upload status: successful".format(file)) except: print("{} // BLOB upload status: failed".format(file))
def make_public_container(STORAGE_NAME, STORAGE_KEY, NEW_CONTAINER_NAME): """"create blob service, blob container and set it to public access. return blob service""" blob_service = BlockBlobService(account_name= STORAGE_NAME, account_key=STORAGE_KEY) new_container_status = blob_service.create_container(NEW_CONTAINER_NAME) blob_service.set_container_acl(NEW_CONTAINER_NAME, public_access=PublicAccess.Container) if new_container_status == True: print('{} BLOB container has been successfully created: {}'.format(NEW_CONTAINER_NAME, new_container_status)) else: print('{} something went wrong: check parameters and subscription'.format(NEW_CONTAINER_NAME))
def test_create_container_with_public_access_container(self): # Arrange container_name = self._get_container_reference() # Act created = self.bs.create_container(container_name, None, 'container') anonymous_service = BlockBlobService(self.settings.STORAGE_ACCOUNT_NAME) # Assert self.assertTrue(created) anonymous_service.list_blobs(container_name)
def test_create_container_with_public_access_blob(self): # Arrange container_name = self._get_container_reference() # Act created = self.bs.create_container(container_name, None, 'blob') self.bs.create_blob_from_text(container_name, 'blob1', u'xyz') anonymous_service = BlockBlobService(self.settings.STORAGE_ACCOUNT_NAME) # Assert self.assertTrue(created) anonymous_service.get_blob_to_text(container_name, 'blob1')
def prepare(self, area): assert area is not None, 'area is none; should already be validated' area_config = config.load_area(area) storage_config = config.load_storage(area_config['storage']) blob_service = BlockBlobService(account_name=storage_config['name'], account_key=storage_config['key1']) blob_service.create_container(area_config['container']) blob_service.set_container_acl(area_config['container'], public_access=PublicAccess.Container)
def store(self): class ExtendedKeysStore(ExtendedKeyspaceMixin, AzureBlockBlobStore): pass from azure.storage.blob import BlockBlobService container = uuid() conn_string = create_azure_conn_string(load_azure_credentials()) s = BlockBlobService(connection_string=conn_string) yield ExtendedKeysStore(conn_string=conn_string, container=container, public=False) s.delete_container(container)
def get_wav_file(account, item): # define blob service block_blob_service = BlockBlobService( account_name=account, ) # get wav file # note: code currently sends 'audio'; should I split, or hardcode? blob = block_blob_service.get_blob_to_bytes('audio', item[6:]) return BytesIO(blob.content)
def test_azure_setgetstate(): from azure.storage.blob import BlockBlobService container = uuid() conn_string = create_azure_conn_string(load_azure_credentials()) s = BlockBlobService(connection_string=conn_string) store = AzureBlockBlobStore(conn_string=conn_string, container=container) store.put(u'key1', b'value1') buf = pickle.dumps(store, protocol=2) store = pickle.loads(buf) assert store.get(u'key1') == b'value1' s.delete_container(container)
def initialize_backend(): global _blob_service global _container global _timeout _blob_service = BlockBlobService( account_name=getenv_required(_ENV_ACCOUNT_NAME), account_key=getenv_required(_ENV_ACCOUNT_KEY)) _container = getenv(_ENV_CONTAINER, _DEFAULT_CONTAINER) _timeout = getenv_int(_ENV_TIMEOUT, _DEFAULT_TIMEOUT) _blob_service.create_container( _container, fail_on_exist=False, timeout=_timeout)
def delete_container(STORAGE_NAME, STORAGE_KEY, CONTAINER_NAME): ############################################################## #RUN THIS ONLY IF YOU WANT TO DELETE A CONTAINTER # #REMEMBER TO DOWNLOAD YOUR DATA BEFORE DELETING THE CONTAINER# #IMPORTANT: YOU WILL LOOSE YOUR BLOB INTO THE CONTAINER # ############################################################## blob_service = BlockBlobService(account_name= STORAGE_NAME, account_key=STORAGE_KEY) #delete container delete_container = blob_service.delete_container(CONTAINER_NAME) print("{} delition status success: {}".format(CONTAINER_NAME, delete_container))
def loaddata(): ACCOUNT_NAME = "<account name>" ACCOUNT_KEY = "<acccount key>" CONTAINER_NAME = "<container name>" blobService = BlockBlobService(account_name=ACCOUNT_NAME, account_key=ACCOUNT_KEY) blobService.get_blob_to_path(CONTAINER_NAME, 'startups.csv', 'startups.csv') dataset = pd.read_csv('startups.csv') #print ('Startups dataset shape: {}'.format(dataset.shape)) X = dataset.iloc[:,:-1].values y = dataset.iloc[:,4].values return X, y
def spider_closed(self, spider): self.exporter.finish_exporting() file = self.files.pop(spider) filename = file.name newname = filename[:-5]+'-'+datetime.datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%S")+'.tsv' file.close() os.rename(filename, newname) if UPLOAD_TO_AZURE_STORAGE: block_blob_service = BlockBlobService(account_name=AZURE_ACCOUNT_NAME, account_key=AZURE_ACCOUNT_KEY) block_blob_service.create_blob_from_path(AZURE_CONTAINER, newname, newname, content_settings=ContentSettings(content_type='text/tab-separated-values') )
def create_blob_sas_defintion(self, storage_account_name, vault_url): """ Creates a service SAS definition with access to a blob container. """ from azure.storage.blob import BlockBlobService, ContainerPermissions from azure.keyvault.models import SasTokenType, SasDefinitionAttributes from azure.keyvault import SecretId # create the blob sas definition template # the sas template uri for service sas definitions contains the storage entity url with the template token # this sample demonstrates constructing the template uri for a blob container, but a similar approach can # be used for all other storage service, i.e. File, Queue, Table # create a template sas token for the container service = BlockBlobService(account_name=storage_account_name, # don't sign the template with the storage account key use key 00000000 account_key='00000000') permissions = ContainerPermissions(read=True, write=True, delete=True, list=True) temp_token = service.generate_container_shared_access_signature(container_name='blobcontainer', permission=permissions, expiry='2020-01-01') # use the BlockBlobService to construct the template uri for the container sas definition blob_sas_template_uri = service.make_container_url(container_name='blobcontainer', protocol='https', sas_token=temp_token) # create the sas definition in the vault attributes = SasDefinitionAttributes(enabled=True) blob_sas_def = self.client.set_sas_definition(vault_base_url=vault_url, storage_account_name=storage_account_name, sas_definition_name='blobcontall', template_uri=blob_sas_template_uri, sas_type=SasTokenType.service, validity_period='PT2H', sas_definition_attributes=attributes) # use the sas definition to provision a sas token and use it to create a BlockBlobClient # which can interact with blobs in the container # get the secret_id of the container sas definition and get the token from the vault as a secret sas_secret_id = SecretId(uri=blob_sas_def.secret_id) blob_sas_token = self.client.get_secret(vault_base_url=sas_secret_id.vault, secret_name=sas_secret_id.name, secret_version=sas_secret_id.version).value service = BlockBlobService(account_name=storage_account_name, sas_token=blob_sas_token) service.create_blob_from_text(container_name='blobcontainer', blob_name='blob2', text=u'test blob2 data') blobs = list(service.list_blobs(container_name='blobcontainer')) for blob in blobs: service.delete_blob(container_name='blobcontainer', blob_name=blob.name)
def upload_assets(self, blob_client: azureblob.BlockBlobService): """ Uploads a the file specified in the json parameters file into a storage container that will delete it's self after 7 days :param blob_client: A blob service client used for making blob operations. :type blob_client: `azure.storage.blob.BlockBlobService` """ input_container_name = "fgrp-" + self.job_id output_container_name = "fgrp-" + self.job_id + '-output' # Create input container blob_client.create_container(input_container_name, fail_on_exist=False) logger.info('creating a storage container: {}'.format(input_container_name)) # Create output container blob_client.create_container(output_container_name, fail_on_exist=False) logger.info('creating a storage container: {}'.format(output_container_name)) full_sas_url_input = 'https://{}.blob.core.windows.net/{}?{}'.format( blob_client.account_name, input_container_name, utils.get_container_sas_token( blob_client, input_container_name, ContainerPermissions.READ + ContainerPermissions.LIST)) full_sas_url_output = 'https://{}.blob.core.windows.net/{}?{}'.format( blob_client.account_name, output_container_name, utils.get_container_sas_token( blob_client, output_container_name, ContainerPermissions.READ + ContainerPermissions.LIST + ContainerPermissions.WRITE)) # Set the storage info for the container. self.storage_info = utils.StorageInfo( input_container_name, output_container_name, full_sas_url_input, full_sas_url_output) # Upload the asset file that will be rendered and scenefile = ctm.get_scene_file(self.parameters_file) for file in os.listdir("Assets"): if scenefile == file: file_path = Path("Assets/" + file) utils.upload_file_to_container(blob_client, input_container_name, file_path)
def test_response_callback(self): # Arrange service = BlockBlobService(self.account_name, self.account_key) name = self.get_resource_name('cont') # Act def callback(response): response.status = 200 response.headers.clear() # Force an exists call to succeed by resetting the status service.response_callback = callback # Assert exists = service.exists(name) self.assertTrue(exists)
def _client(self): if not self.__client: self.__client = BlockBlobService( connection_string=self._connection_string) self.__client.create_container( self._container_name, fail_on_exist=False) return self.__client
def container_client(self): return BlockBlobService(self.storage_account, self.storage_key)
from ckanapi.errors import CKANAPIError from datetime import datetime from dateutil import parser as dateparser from tempfile import mkdtemp # noinspection PyPackageRequirements from azure.storage.blob.models import ResourceProperties # Read configuration information and initialize Config = ConfigParser.ConfigParser() Config.read('azure.ini') ckanjson_dir = Config.get('working', 'ckanjson_directory') block_blob_service = BlockBlobService( Config.get('azure-blob-storage', 'account_name'), Config.get('azure-blob-storage', 'account_key')) ckan_container = Config.get('azure-blob-storage', 'account_obd_container') gcdocs_container = Config.get('azure-blob-storage', 'account_gcdocs_container') doc_intake_dir = Config.get('working', 'intake_directory') # Setup logging logger = logging.getLogger('base') logger.setLevel(logging.DEBUG) ch = logging.StreamHandler() ch.setLevel(logging.DEBUG) fh = logging.FileHandler(datetime.now().strftime( Config.get('working', 'error_logfile'))) ch.setLevel(logging.INFO)
# How this works:
sto_accountKey = configData['sto_accountKey'] log_name = configData['logName'] log_level = configData['logLevel'] purge_log = configData['purgeLog'] # Get a fresh API access token... response = amspy.get_access_token(account_name, account_key) resjson = response.json() access_token = resjson["access_token"] # Get Asset by using the list_media_asset method and the Asset ID response = amspy.list_media_asset(access_token, OUTPUTASSETID) if (response.status_code == 200): resjson = response.json() # Get the container name from the Uri outputAssetContainer = resjson['d']['Uri'].split('/')[3] print(outputAssetContainer) ### Use the Azure Blob Blob Service library from the Azure Storage SDK. block_blob_service = BlockBlobService(account_name=sto_account_name, account_key=sto_accountKey) generator = block_blob_service.list_blobs(outputAssetContainer) for blob in generator: print(blob.name) if (blob.name.endswith(".vtt")): blobText = block_blob_service.get_blob_to_text(outputAssetContainer, blob.name) print("\n\n##### WEB VTT ######") print(blobText.content) block_blob_service.get_blob_to_path(outputAssetContainer, blob.name, "output/" + blob.name)
def upload_recording(filename: str, config): upload_light = led.led(16) # GPIO 20 is the Uploading indicator upload_light.on() try: logger.write('Uploading status file...') status_file = status.update_status(False, True, False) status.upload_status(status_file, False) logger.write('Upload complete.') except Exception as e: logger.write('An error occurred while uploading a status file.') logger.write(str(e)) try: start = time.time() logger.write('Uploading...') credential_path = 'credentials.ini' credentials = configparser.ConfigParser() credentials.read(credential_path) container = config.get('Cloud', 'container') username = credentials.get('Azure', 'Username') password = credentials.get('Azure', 'Password') block_blob_service = BlockBlobService(account_name=username, account_key=password) # Force chunked uploading and set upload block sizes to 8KB block_blob_service.MAX_SINGLE_PUT_SIZE=16 block_blob_service.MAX_BLOCK_SIZE=8*1024 timestamp = os.path.basename(filename).split('.')[0] extension = os.path.basename(filename).split('.')[1] timestamp_day = timestamp.split('_')[1] timestamp_time = timestamp.split('_')[2] timestamp_day = timestamp_day.replace('-', '_') timestamp_time = timestamp_time.replace('-', '_') blob_name = timestamp_day + '/' + timestamp_time + '/recording.' + extension block_blob_service.create_blob_from_path(container, blob_name, filename) end = time.time() elapsed = end - start logger.write('Upload Succeeded: ' + blob_name) logger.write('Upload took ' + str(elapsed) + ' seconds.\n') if REMOVE_RECORDINGS: os.remove(filename) except Exception as e: logger.write('CheckConfig: There was an error uploading to the cloud.') logger.write(str(e)) upload_light.off() return upload_light.off() logger.write('Upload complete') try: logger.write('Uploading status file...') status_file = status.update_status(False, False, False) status.upload_status(status_file, False) logger.write('Upload complete.') except Exception as e: logger.write('An error occurred while uploading a status file.') logger.write(str(e))
class AMLMonitor: def __init__(self, request_id, list_jobs_submitted, request_name, request_submission_timestamp, model_version): self.request_id = request_id self.jobs_submitted = list_jobs_submitted self.request_name = request_name # None if not provided by the user self.request_submission_timestamp = request_submission_timestamp # str self.model_version = model_version # str storage_account_name = os.getenv('STORAGE_ACCOUNT_NAME') storage_account_key = os.getenv('STORAGE_ACCOUNT_KEY') self.internal_storage_service = BlockBlobService( account_name=storage_account_name, account_key=storage_account_key) self.internal_datastore = { 'account_name': storage_account_name, 'account_key': storage_account_key, 'container_name': api_config.INTERNAL_CONTAINER } self.aml_output_container = api_config.AML_CONTAINER self.internal_container = api_config.INTERNAL_CONTAINER def get_total_jobs(self): return len(self.jobs_submitted) def check_job_status(self): print('AMLMonitor, check_job_status() called.') all_jobs_finished = True status_tally = defaultdict(int) for job_id, job in self.jobs_submitted.items(): pipeline_run = job['pipeline_run'] status = pipeline_run.get_status( ) # common values returned include Running, Completed, and Failed - March 19 apparently Finished is the enumeration print('request_id {}, job_id {}, status is {}'.format( self.request_id, job_id, status)) status_tally[status] += 1 if status not in api_config.AML_CONFIG[ 'completed_status']: # else all_job_finished will not be flipped all_jobs_finished = False return all_jobs_finished, status_tally def _download_read_json(self, blob_path): blob = self.internal_storage_service.get_blob_to_text( self.aml_output_container, blob_path) stream = io.StringIO(blob.content) result = json.load(stream) return result def _generate_urls_for_outputs(self): try: request_id = self.request_id request_name, request_submission_timestamp = self.request_name, self.request_submission_timestamp blob_paths = { 'detections': '{}/{}_detections_{}_{}.json'.format( request_id, request_id, request_name, request_submission_timestamp), 'failed_images': '{}/{}_failed_images_{}_{}.json'.format( request_id, request_id, request_name, request_submission_timestamp), # list of images do not have request_name and timestamp in the file name so score.py can locate it easily 'images': '{}/{}_images.json'.format(request_id, request_id) } expiry = datetime.utcnow() + timedelta( days=api_config.EXPIRATION_DAYS) urls = {} for output, blob_path in blob_paths.items(): sas = self.internal_storage_service.generate_blob_shared_access_signature( self.internal_container, blob_path, permission=BlobPermissions.READ, expiry=expiry) url = self.internal_storage_service.make_blob_url( self.internal_container, blob_path, sas_token=sas) urls[output] = url return urls except Exception as e: raise RuntimeError( 'An error occurred while generating URLs for the output files. ' + 'Please contact us to retrieve your results. ' + 'Error: {}'.format(str(e))) def aggregate_results(self): print('AMLMonitor, aggregate_results() called') # The more efficient method is to know the run_id which is the folder name that the result is written to. # Since we can't reliably get the run_id after submitting the run, resort to listing all blobs in the output # container and match by the request_id # listing all (up to a large limit) because don't want to worry about generator next_marker datastore_aml_container = copy.deepcopy(self.internal_datastore) datastore_aml_container['container_name'] = self.aml_output_container list_blobs = SasBlob.list_blobs_in_container( api_config.MAX_BLOBS_IN_OUTPUT_CONTAINER, datastore=datastore_aml_container, blob_suffix='.json') all_detections = [] failures = [] num_aggregated = 0 for blob_path in list_blobs: if blob_path.endswith('.json'): # blob_path is azureml/run_id/output_requestID/out_file_name.json out_file_name = blob_path.split('/')[-1] # "request" is part of the AML job_id if out_file_name.startswith('detections_request{}_'.format( self.request_id)): all_detections.extend(self._download_read_json(blob_path)) num_aggregated += 1 print('Number of results aggregated: ', num_aggregated) elif out_file_name.startswith('failures_request{}_'.format( self.request_id)): failures.extend(self._download_read_json(blob_path)) print('aggregate_results(), length of all_detections: {}'.format( len(all_detections))) detection_output_content = { 'info': { 'detector': 'megadetector_v{}'.format(self.model_version), 'detection_completion_time': get_utc_time(), 'format_version': api_config.OUTPUT_FORMAT_VERSION }, 'detection_categories': api_config.DETECTION_CATEGORIES, 'images': all_detections } # order the json output keys detection_output_content = OrderedDict([ ('info', detection_output_content['info']), ('detection_categories', detection_output_content['detection_categories']), ('images', detection_output_content['images']) ]) detection_output_str = json.dumps(detection_output_content, indent=1) # upload aggregated results to output_store self.internal_storage_service.create_blob_from_text( self.internal_container, '{}/{}_detections_{}_{}.json'.format( self.request_id, self.request_id, self.request_name, self.request_submission_timestamp), detection_output_str, max_connections=4) print('aggregate_results(), detections uploaded') print('aggregate_results(), number of failed images: {}'.format( len(failures))) failures_str = json.dumps(failures, indent=1) self.internal_storage_service.create_blob_from_text( self.internal_container, '{}/{}_failed_images_{}_{}.json'.format( self.request_id, self.request_id, self.request_name, self.request_submission_timestamp), failures_str) print('aggregate_results(), failures uploaded') output_file_urls = self._generate_urls_for_outputs() return output_file_urls
import string def find_person_in_string(s): if "Nicholas" in s: return "Nicholas" if "Ben" in s: return "Ben" if "Rob" in s: return "Rob" # initialise blob service block_blob_service = BlockBlobService( account_name='nikolas', account_key= 'b/qWJCuFxdUD4A9Y6erFvXwqMcUBNJz+MAHHADXWN4v+8JRMxMfIW+nqeGKfUFhP1xcb5GJzA2OSuVEs3rVr0Q==' ) block_blob_service.create_blob_from_path( 'addresses', 'zoom_0.mp4', 'zoom_0.mp4', content_settings=ContentSettings(content_type='video/mp4')) #get url # block_blob_service.set_container_acl("addresses",{"AccessPolicy": "abc"}) sas_token = block_blob_service.generate_blob_shared_access_signature( "addresses", "zoom_0.mp4", permission=BlobPermissions().READ,
class DataSet: @classmethod def fromstrings(cls, start_date_string, end_date_string): start_date = date(int(start_date_string[0:4]), int(start_date_string[4:6]), int(start_date_string[6:8])) end_date = date(int(end_date_string[0:4]), int(end_date_string[4:6]), int(end_date_string[6:8])) return cls(start_date, end_date) def __init__(self, start_date, end_date): self.start_date = start_date self.end_date = end_date self.config = configparser.ConfigParser() self.config.read('ds.config') self.ds = self.config['DecisionService'] self.cache_folder = self.ds['CacheFolder'] self.joined_examples_container = self.ds['JoinedExamplesContainer'] self.experimental_unit_duration_days = self.ds[ 'ExperimentalUnitDurationDays'] # https://azure-storage.readthedocs.io/en/latest/_modules/azure/storage/blob/models.html#BlobBlock self.block_blob_service = BlockBlobService( account_name=self.ds['AzureBlobStorageAccountName'], account_key=self.ds['AzureBlobStorageAccountKey']) # Lookback 'experimental_unit_duration_days' for events self.start_date_withlookback = start_date + timedelta( days=-int(self.experimental_unit_duration_days)) self.ordered_joined_events_filename = os.path.join( self.cache_folder, 'data_{0}-{1}.json'.format(start_date.strftime('%Y%m%d'), end_date.strftime('%Y%m%d'))) # create scoring directories for [start_date, end_date] range self.scoring_dir = os.path.join(self.cache_folder, 'scoring') if not os.path.exists(self.scoring_dir): os.makedirs(self.scoring_dir) def download_events(self): temp = [] for current_date in dates_in_range(self.start_date_withlookback, self.end_date): blob_prefix = current_date.strftime( '%Y/%m/%d/' ) #'{0}/{1}/{2}/'.format(current_date.year, current_date.month, current_date.day) temp += filter( lambda b: b.properties.content_length != 0, self.block_blob_service.list_blobs( self.joined_examples_container, prefix=blob_prefix)) self.joined = list(map(parse_name, temp)) self.global_idx = {} self.global_model_idx = {} self.data = [] def load_data(ts, blob): jd = JoinedData(self.block_blob_service, self.cache_folder, self.joined_examples_container, ts, blob) jd.index() return jd print("Downloading & indexing events...") with Pool(processes=8) as p: self.data = p.map(lambda x: load_data(x[0], x[1]), self.joined) for jd in self.data: reader = jd.reader() for evt in jd.ids: # print("'{0}' <- {1}" .format(evt.evt_id, reader)) self.global_idx[evt.evt_id] = reader def build_model_history(self): print('Found {0} events. Sorting data files by time...'.format( len(self.global_idx))) self.data.sort(key=lambda jd: jd.ts) # reproduce training, by using trackback files self.model_history = list( get_checkpoint_models(self.block_blob_service, self.start_date_withlookback, self.end_date)) with Pool(5) as p: self.model_history = p.map( lambda x: CheckpointedModel(self.block_blob_service, x[ 0], self.cache_folder, x[1], x[2]), self.model_history) for m in self.model_history: if m.model_id is not None: self.global_model_idx[m.model_id] = m self.model_history.sort(key=lambda jd: jd.ts) def get_online_settings(self): online_settings_blob = CachedBlob(self.block_blob_service, self.cache_folder, 'mwt-settings', 'client') return json.load( open(online_settings_blob.filename, 'r', encoding='utf8')) def create_files(self): for local_date in dates_in_range(self.start_date, self.end_date): scoring_dir_date = os.path.join(self.scoring_dir, local_date.strftime('%Y/%m/%d')) if os.path.exists(scoring_dir_date): rmtree(scoring_dir_date) os.makedirs(scoring_dir_date) ordered_joined_events = open(self.ordered_joined_events_filename, 'w', encoding='utf8') num_events_counter = 0 missing_events_counter = 0 model_history_withindaterange = filter( lambda x: x.ts.date() >= self.start_date, self.model_history) print('Creating {0} scoring models...'.format( len(list(model_history_withindaterange)))) for m in self.model_history: # for scoring and ips calculations, we only consider models within [start_date, end_date] if m.ts.date() < self.start_date: continue print('Creating scoring models {0}...'.format( m.ts.strftime('%Y/%m/%d %H:%M:%S'))) num_valid_events = 0 if m.model_id is None: # no modelid available, skipping scoring event creation for event_id in m.trackback_ids: # print("'{0}'" .format(event_id)) if event_id in self.global_idx: # print("found '{0}'" .format(event_id)) line = self.global_idx[event_id].read(event_id) if line: line = line.strip() + ('\n') _ = ordered_joined_events.write(line) num_events_counter += 1 num_valid_events += 1 else: missing_events_counter += 1 else: for event_id in m.trackback_ids: if event_id in self.global_idx: line = self.global_idx[event_id].read(event_id) if line: line = line.strip() + ('\n') _ = ordered_joined_events.write(line) num_events_counter += 1 num_valid_events += 1 scoring_model_id = json.loads(line)['_model_id'] if scoring_model_id is None: continue # this can happen at the very beginning if no model was available if scoring_model_id not in self.global_model_idx: continue # this can happen if the event was scored using a model that lies outside our model history scoring_model = self.global_model_idx[ scoring_model_id] if scoring_model.ts.date() >= self.start_date: # the event was scored using a model which was generated prior to start_date # so we can exclude it from scoring scoring_filename = os.path.join( self.scoring_dir, scoring_model.ts.strftime('%Y'), scoring_model.ts.strftime('%m'), scoring_model.ts.strftime('%d'), scoring_model_id + '.json') # with open(scoring_filename, 'a', encoding='utf8') as scoring_file: # _ = scoring_file.write(line) else: missing_events_counter += 1 if num_valid_events > 0: scoring_model_filename = os.path.join( self.scoring_dir, m.ts.strftime('%Y'), m.ts.strftime('%m'), m.ts.strftime('%d'), m.model_id + '.model') _ = ordered_joined_events.write( json.dumps({ '_tag': 'save_{0}'.format(scoring_model_filename) }) + ('\n')) ordered_joined_events.close() def train_models(self): model_history_prestart = list( filter(lambda x: x.ts.date() < self.start_date, self.model_history)) model_init = max(model_history_prestart, key=lambda x: x.ts) model_init_name = model_init.trackback.filename.rsplit( '.trackback', 1)[0] print("Warm start model: '{0}'".format(model_init_name)) # Download model_init (and make sure it works on windows) model_init_info = re.split('[/\\\\]+', model_init_name)[-4:] container = model_init_info[0] name = model_init_info[1] + '/' + model_init_info[ 2] + '/' + model_init_info[3] CachedBlob(self.block_blob_service, self.cache_folder, container, name) online_args = self.get_online_settings()['TrainArguments'] vw_cmdline = 'vw ' + self.ordered_joined_events_filename + ' --json --save_resume --preserve_performance_counters -i ' + model_init_name + ' ' + online_args # vw_cmdline += ' --quiet' print(vw_cmdline) os.system(vw_cmdline)
def get_file_list_from_container(container, account_name, account_key): block_blob_service = BlockBlobService(account_name=account_name, account_key=account_key) generator = block_blob_service.list_blobs(container) for blob in generator: print(blob.name, blob.properties.last_modified)
def get_conn(self) -> BlockBlobService: """Return the BlockBlobService object.""" conn = self.get_connection(self.conn_id) service_options = conn.extra_dejson return BlockBlobService(account_name=conn.login, account_key=conn.password, **service_options)
import os import boto3 import pymysql import time import MySQLdb from azure.storage.blob import BlockBlobService from azure.storage.blob import ContentSettings from flask import Flask, request, send_from_directory, render_template app = Flask(__name__) block_blob_service = BlockBlobService(account_name='accoutName', account_key='accountKey') blobStore = "blobStoreageURL" localStore = "localPathToDefaultStorageOfFilesAndImages" hostname = "azureHostName" username = "******" password = "******" database = "azureDatabaseName" mySQLCon = MySQLdb.connect(host="mysqlHostName", user="******", passwd="mysqlPassword", db="mysqlDBName") def doQuery(mySQLCon, cityName, fare1, fare2): cur = mySQLCon.cursor() cur.execute("Query to fetch description of file/image")
#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Created on Wed Aug 8 2018 @author: "Anirban Das" """ import os import getpass import datetime , sys, time, csv from azure.storage.blob import BlockBlobService, PublicAccess block_blob_service = BlockBlobService(account_name='<Storage Account Name>', account_key='<Key 1 of the Storage Account>') container_name ='<Container to Upload Files>' # Create the BlockBlockService block_blob_service.create_container(container_name) STATS_DIRECTORY = "." # write local stats in a csv file def write_local_stats(filename, stats_list): global STATS_DIRECTORY try: filepath = STATS_DIRECTORY.rstrip(os.sep) + os.sep + filename with open(filepath, 'w') as file: writer = csv.writer(file, delimiter=',') writer.writerows(stats_list) except :
class AzureBlobWriter(BaseWriter): """ Writes items to azure blob containers. - account_name (str) Public acces name of the azure account. - account_key (str) Public acces key to the azure account. - container (str) Blob container name. """ supported_options = { 'account_name': { 'type': six.string_types, 'env_fallback': 'EXPORTERS_AZUREWRITER_NAME' }, 'account_key': { 'type': six.string_types, 'env_fallback': 'EXPORTERS_AZUREWRITER_KEY' }, 'container': { 'type': six.string_types } } hash_algorithm = 'md5' VALID_CONTAINER_NAME_RE = r'[a-zA-Z0-9-]{3,63}' def __init__(self, *args, **kw): from azure.storage.blob import BlockBlobService super(AzureBlobWriter, self).__init__(*args, **kw) account_name = self.read_option('account_name') account_key = self.read_option('account_key') self.container = self.read_option('container') if '--' in self.container or not re.match(self.VALID_CONTAINER_NAME_RE, self.container): help_url = ('https://azure.microsoft.com/en-us/documentation' '/articles/storage-python-how-to-use-blob-storage/') warnings.warn( "Container name %s doesn't conform with naming rules (see: %s)" % (self.container, help_url)) self.azure_service = BlockBlobService(account_name, account_key) self.azure_service.create_container(self.container) self.logger.info('AzureBlobWriter has been initiated.' 'Writing to container {}'.format(self.container)) self.set_metadata('files_counter', 0) self.set_metadata('blobs_written', []) def write(self, dump_path, group_key=None): self.logger.info('Start uploading {} to {}'.format( dump_path, self.container)) self._write_blob(dump_path) self.set_metadata('files_counter', self.get_metadata('files_counter') + 1) @retry_long def _write_blob(self, dump_path): blob_name = dump_path.split('/')[-1] self.azure_service.create_blob_from_path( self.read_option('container'), blob_name, dump_path, max_connections=5, ) self.logger.info('Saved {}'.format(blob_name)) self._update_metadata(dump_path, blob_name) def _update_metadata(self, dump_path, blob_name): buffer_info = self.write_buffer.metadata[dump_path] file_info = { 'blob_name': blob_name, 'size': buffer_info['size'], 'hash': b64encode(unhexlify(buffer_info['file_hash'])), 'number_of_records': buffer_info['number_of_records'] } self.get_metadata('blobs_written').append(file_info) def _check_write_consistency(self): from azure.common import AzureMissingResourceHttpError for blob_info in self.get_metadata('blobs_written'): try: blob = self.azure_service.get_blob_properties( self.read_option('container'), blob_info['blob_name']) blob_size = blob.properties.content_length blob_md5 = blob.properties.content_settings.content_md5 if str(blob_size) != str(blob_info['size']): raise InconsistentWriteState( 'File {} has unexpected size. (expected {} - got {})'. format(blob_info['blob_name'], blob_info['size'], blob_size)) if str(blob_md5) != str(blob_info['hash']): raise InconsistentWriteState( 'File {} has unexpected hash. (expected {} - got {})'. format(blob_info['blob_name'], blob_info['hash'], blob_md5)) except AzureMissingResourceHttpError: raise InconsistentWriteState('Missing blob {}'.format( blob_info['blob_name'])) self.logger.info('Consistency check passed')
# from predict import initialize, predict_image from azure.storage.queue import QueueService, QueueMessageFormat import base64 # from video2image import sampling from concurrent import futures from concurrent.futures import ThreadPoolExecutor print("Azure Blob storage v12 - Python quickstart sample") API_ENDPOINT = "http://127.0.0.1:5000/image" account_name = "cowimagestorage" account_key = "" block_blob_service = BlockBlobService( account_name=account_name, account_key=account_key ) <<<<<<< HEAD queue = QueueService(connection_string="") queue.encode_function = QueueMessageFormat.text_base64encode def process_single_file(filename, blob, dirname=""): # image = {'imageData': open('../test_image/{filename}'.format(filename=filename), 'rb')} print(">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>") # with open('{dirname}/{filename}'.format(filename=filename, dirname=dirname), 'rb') as image: ======= def process_single_file(filename):
from datetime import datetime from FlaskWebProject import app, db, login from werkzeug.security import generate_password_hash, check_password_hash from flask_login import UserMixin from azure.storage.blob import BlockBlobService import string import random from werkzeug import secure_filename from flask import flash blob_container = app.config['BLOB_CONTAINER'] blob_service = BlockBlobService(account_name=app.config['BLOB_ACCOUNT'], account_key=app.config['BLOB_STORAGE_KEY']) def id_generator(size=32, chars=string.ascii_uppercase + string.digits): return ''.join(random.choice(chars) for _ in range(size)) class User(UserMixin, db.Model): __tablename__ = 'users' id = db.Column(db.Integer, primary_key=True) username = db.Column(db.String(64), index=True, unique=True) password_hash = db.Column(db.String(128)) def __repr__(self): return '<User {}>'.format(self.username) def set_password(self, password): self.password_hash = generate_password_hash(password)
import os, uuid, sys, configparser from azure.storage.blob import BlockBlobService, PublicAccess config = configparser.ConfigParser() config.read('config.ini') # Create the BlockBlockService that is used to call the Blob service for the storage account block_blob_service = BlockBlobService( account_name=config['DEFAULT']['storage_account_name'], account_key=config['DEFAULT']['storage_account_key'], endpoint_suffix="core.usgovcloudapi.net") # Create a container called 'texts'. container_name = 'texts' block_blob_service.create_container(container_name) print('created container') # Set the permission so the blobs are public. block_blob_service.set_container_acl(container_name, public_access=PublicAccess.Container) print('permissions set') path = '/Users/ephraimsalhanick/Desktop/AzureMLSI2018/Random_Speeches/txt' for filename in os.listdir(path): # Upload the file to storage block_blob_service.create_blob_from_path(container_name, filename, path + '/' + filename) print('uploaded: ' + filename)
def run_sample(): try: # Create the BlockBlockService that is used to call the Blob service for the storage account block_blob_service = BlockBlobService(account_name='hackgt19', account_key='24wGa1RHd0BnemSDBbqRzvvTAB7Qy4IAN28E9de6OLR98wxnFljJXnKaBtzqJd2F53SmtNZP2NnZCPZkeL6wlQ==') # Create a container called 'quickstartblobs'. container_name ='quickstartblobs' block_blob_service.create_container(container_name) # Set the permission so the blobs are public. block_blob_service.set_container_acl(container_name, public_access=PublicAccess.Container) # Create a file in Documents to test the upload and download. local_path=os.path.expanduser("~/Documents") local_file_name ="QuickStart_" + str(uuid.uuid4()) + ".txt" full_path_to_file =os.path.join(local_path, local_file_name) # Write text to the file. file = open(full_path_to_file, 'w') file.write("Hello, World!") file.close() print("Temp file = " + full_path_to_file) print("\nUploading to Blob storage as blob" + local_file_name) # Upload the created file, use local_file_name for the blob name block_blob_service.create_blob_from_path(container_name, local_file_name, full_path_to_file) # List the blobs in the container print("\nList blobs in the container") generator = block_blob_service.list_blobs(container_name) for blob in generator: print("\t Blob name: " + blob.name) # Download the blob(s). # Add '_DOWNLOADED' as prefix to '.txt' so you can see both files in Documents. full_path_to_file2 = os.path.join(local_path, str.replace(local_file_name ,'.txt', '_DOWNLOADED.txt')) print("\nDownloading blob to " + full_path_to_file2) block_blob_service.get_blob_to_path(container_name, local_file_name, full_path_to_file2) sys.stdout.write("Sample finished running. When you hit <any key>, the sample will be deleted and the sample " "application will exit.") sys.stdout.flush() input() # Clean up resources. This includes the container and the temp files block_blob_service.delete_container(container_name) os.remove(full_path_to_file) os.remove(full_path_to_file2) except Exception as e: print(e)
class AzureClient(BaseClient): def __init__(self, operation_name, configuration, directory_persistent, directory_work_list, poll_delay_time, poll_maximum_time): super(AzureClient, self).__init__(operation_name, configuration, directory_persistent, directory_work_list, poll_delay_time, poll_maximum_time) if configuration['credhub_url'] is None: self.__setCredentials(configuration['client_id'], configuration['client_secret'], configuration['tenant_id']) self.resource_group = configuration['resource_group'] self.storage_account_name = configuration['storageAccount'] self.storage_account_key = configuration['storageAccessKey'] self.subscription_id = configuration['subscription_id'] else: self.logger.info('fetching creds from credhub') azure_config = self._get_credentials_from_credhub(configuration) self.__setCredentials(azure_config['client_id'], azure_config['client_secret'], azure_config['tenant_id']) self.resource_group = azure_config['resource_group'] self.storage_account_name = azure_config['storageAccount'] self.storage_account_key = azure_config['storageAccessKey'] self.subscription_id = azure_config['subscription_id'] self.block_blob_service = BlockBlobService( account_name=self.storage_account_name, account_key=self.storage_account_key) self.compute_client = ComputeManagementClient(self.__azureCredentials, self.subscription_id) # +-> Check whether the given container exists and accessible if (not self.get_container()) or (not self.access_container()): msg = 'Could not find or access the given container.' self.last_operation(msg, 'failed') raise Exception(msg) # scsi_host_number would be used to determine lun to device mapping # scsi_host_number would be same for all data volumes/disks self.scsi_host_number = self.get_host_number_of_data_volumes() if not self.scsi_host_number: msg = 'Could not determine SCSI host number for data volume' self.last_operation(msg, 'failed') raise Exception(msg) self.instance_location = self.get_instance_location( configuration['instance_id']) if not self.instance_location: msg = 'Could not retrieve the location of the instance.' self.last_operation(msg, 'failed') raise Exception(msg) self.max_block_size = 100 * 1024 * 1024 #list of regions where ZRS is supported self.zrs_supported_regions = [ 'westeurope', 'centralus', 'southeastasia', 'eastus2', 'northeurope', 'francecentral' ] self.availability_zones = self._get_availability_zone_of_server( configuration['instance_id']) def __setCredentials(self, client_id, client_secret, tenant_id): self.__azureCredentials = ServicePrincipalCredentials( client_id=client_id, secret=client_secret, tenant=tenant_id) def get_container(self): try: container_props = self.block_blob_service.get_container_properties( self.CONTAINER) return container_props except Exception as error: self.logger.error( '[Azure] [STORAGE] ERROR: Unable to find container {}.\n{}'. format(self.CONTAINER, error)) return None def access_container(self): # Test if the container is accessible try: key = '{}/{}'.format(self.GUID, 'AccessTestByServiceFabrikPythonLibrary') self.block_blob_service.create_blob_from_text( self.CONTAINER, key, 'This is a sample text') self.block_blob_service.delete_blob(self.CONTAINER, key) return True except Exception as error: self.logger.error( '[Azure] [STORAGE] ERROR: Unable to access container {}.\n{}'. format(self.CONTAINER, error)) return False def _get_availability_zone_of_server(self, instance_id): try: instance = self.compute_client.virtual_machines.get( self.resource_group, instance_id) return instance.zones except Exception as error: self.logger.error( '[Azure] ERROR: Unable to find or access attached volume for instance_id {}.{}' .format(instance_id, error)) return None def get_snapshot(self, snapshot_name): try: snapshot = self.compute_client.snapshots.get( self.resource_group, snapshot_name) return Snapshot(snapshot.name, snapshot.disk_size_gb, snapshot.provisioning_state) except Exception as error: self.logger.error( '[Azure] ERROR: Unable to find or access snapshot {}.\n{}'. format(snapshot_name, error)) return None def get_volume(self, volume_name): try: volume = self.compute_client.disks.get(self.resource_group, volume_name) return Volume(volume.name, volume.provisioning_state, volume.disk_size_gb) except Exception as error: self.logger.error( '[Azure] ERROR: Unable to find or access volume/disk {}.\n{}'. format(volume_name, error)) return None def get_host_number_of_data_volumes(self): ''' This particual funtion is specific for Azure. This determines the scsi host number for the persistent disk. The host number along with lun would be required to find out device deterministic way. ''' host_number = None try: device_persistent_volume = self.shell('cat {} | grep {}'.format( self.FILE_MOUNTS, self.DIRECTORY_PERSISTENT)).split(' ')[0][5:-1] device_paths = glob.glob( '/sys/bus/scsi/devices/*:*:*:*/block/{}'.format( device_persistent_volume)) if len(device_paths) > 1: raise Exception('Invalid device paths for device {}'.format( device_persistent_volume)) # Success: Go only one device path host_number = device_paths[0][22:-len('/block/{}'.format( device_persistent_volume))].split(':')[0] except Exception as error: self.logger.error( '[ERROR] [SCSI HOST NUMBER] [DATA VOLUME] Error while determining SCSI host number' 'of persistent volume directory {}.{}'.format( self.DIRECTORY_PERSISTENT, error)) return host_number def get_instance_location(self, instance_id): try: instance = self.compute_client.virtual_machines.get( self.resource_group, instance_id) return instance.location except Exception as error: self.logger.error( '[Azure] ERROR: Unable to get location for instance_id {}.{}'. format(instance_id, error)) return None def get_attached_volumes_for_instance(self, instance_id): try: instance = self.compute_client.virtual_machines.get( self.resource_group, instance_id) self.availability_zones = instance.zones volume_list = [] for disk in instance.storage_profile.data_disks: device = None device_path = glob.glob( self.DEVICE_PATH_TEMPLATE.format(self.scsi_host_number, disk.lun)) if len(device_path) != 1: raise Exception( 'Expected number of device path not matching 1 != {} fo lun {}' .format(len(device_path), disk.lun)) device = '/dev/{}'.format( self.shell('ls {}'.format(device_path[0])).rstrip()) volume_list.append( Volume(disk.name, 'none', disk.disk_size_gb, device)) return volume_list except Exception as error: self.logger.error( '[Azure] ERROR: Unable to find or access attached volume for instance_id {}.{}' .format(instance_id, error)) return [] def get_persistent_volume_for_instance(self, instance_id): try: device = self.shell('cat {} | grep {}'.format( self.FILE_MOUNTS, self.DIRECTORY_PERSISTENT)).split(' ')[0][:8] for volume in self.get_attached_volumes_for_instance(instance_id): if volume.device == device: self._add_volume_device(volume.id, device) return volume return None except Exception as error: self.logger.error( '[ERROR] [GET PRESISTENT VOLUME] Unable to find persistent volume for instance {}.{}' .format(instance_id, error)) return None def location_supports_zrs(self, location): return location in self.zrs_supported_regions def _create_snapshot(self, volume_id): log_prefix = '[SNAPSHOT] [CREATE]' snapshot = None self.logger.info('{} START for volume id {} with tags {}'.format( log_prefix, volume_id, self.tags)) try: disk_info = self.compute_client.disks.get(self.resource_group, volume_id) snapshot_name = self.generate_name_by_prefix(self.SNAPSHOT_PREFIX) if self.location_supports_zrs(disk_info.location): snapshot_creation_operation = self.compute_client.snapshots.create_or_update( self.resource_group, snapshot_name, { 'location': disk_info.location, 'tags': self.tags, 'creation_data': { 'create_option': DiskCreateOption.copy, 'source_uri': disk_info.id }, 'sku': { 'name': 'Standard_ZRS' } }) else: snapshot_creation_operation = self.compute_client.snapshots.create_or_update( self.resource_group, snapshot_name, { 'location': disk_info.location, 'tags': self.tags, 'creation_data': { 'create_option': DiskCreateOption.copy, 'source_uri': disk_info.id }, 'sku': { 'name': 'Standard_LRS' } }) self._wait( 'Waiting for snapshot {} to get ready...'.format( snapshot_name), lambda operation: operation.done() is True, None, snapshot_creation_operation) snapshot_info = snapshot_creation_operation.result() self.logger.info( 'Snapshot creation response: {}'.format(snapshot_info)) snapshot = Snapshot(snapshot_info.name, snapshot_info.disk_size_gb, snapshot_info.provisioning_state) self._add_snapshot(snapshot.id) self.logger.info( '{} SUCCESS: snapshot-id={}, volume-id={} , tags={} '.format( log_prefix, snapshot.id, volume_id, self.tags)) self.output_json['snapshotId'] = snapshot.id except Exception as error: message = '{} ERROR: volume-id={}\n{}'.format( log_prefix, volume_id, error) self.logger.error(message) if snapshot: self.delete_snapshot(snapshot.id) snapshot = None raise Exception(message) return snapshot def _copy_snapshot(self, snapshot_id): return self.get_snapshot(snapshot_id) def _delete_snapshot(self, snapshot_id): log_prefix = '[SNAPSHOT] [DELETE]' try: snapshot_deletion_operation = self.compute_client.snapshots.delete( self.resource_group, snapshot_id) # TODO: can be implemented the following wait as 'operation.done() is True' self._wait( 'Waiting for snapshot {} to be deleted...'.format(snapshot_id), lambda id: not self.get_snapshot(id), None, snapshot_id) snapshot_delete_response = snapshot_deletion_operation.result() self._remove_snapshot(snapshot_id) self.logger.info('{} SUCCESS: snapshot-id={}\n{}'.format( log_prefix, snapshot_id, snapshot_delete_response)) return True except Exception as error: message = '{} ERROR: snapshot-id={}\n{}'.format( log_prefix, snapshot_id, error) self.logger.error(message) raise Exception(message) def _create_volume(self, size, snapshot_id=None): log_prefix = '[VOLUME] [CREATE]' volume = None try: disk_creation_operation = None disk_name = None if snapshot_id is not None: snapshot = self.compute_client.snapshots.get( self.resource_group, snapshot_id) disk_name = self.generate_name_by_prefix(self.DISK_PREFIX) disk_creation_operation = self.compute_client.disks.create_or_update( self.resource_group, disk_name, { 'location': self.instance_location, 'tags': self.tags, 'creation_data': { 'create_option': DiskCreateOption.copy, 'source_uri': snapshot.id }, 'zones': self.availability_zones }) else: disk_name = self.generate_name_by_prefix(self.DISK_PREFIX) disk_creation_operation = self.compute_client.disks.create_or_update( self.resource_group, disk_name, { 'location': self.instance_location, 'tags': self.tags, 'disk_size_gb': size, 'creation_data': { 'create_option': DiskCreateOption.empty }, 'account_type': StorageAccountTypes.standard_lrs, 'zones': self.availability_zones }) self._wait( 'Waiting for volume {} to get ready...'.format(disk_name), lambda operation: operation.done() is True, None, disk_creation_operation) disk = disk_creation_operation.result() volume = Volume(disk.name, 'none', disk.disk_size_gb) self._add_volume(volume.id) self.logger.info('{} SUCCESS: volume-id={} with tags={} '.format( log_prefix, volume.id, self.tags)) except Exception as error: message = '{} ERROR: size={}\n{}'.format(log_prefix, size, error) self.logger.error(message) if volume: self.delete_volume(volume.id) volume = None raise Exception(message) return volume def _delete_volume(self, volume_id): log_prefix = '[VOLUME] [DELETE]' try: disk_deletion_operation = self.compute_client.disks.delete( self.resource_group, volume_id) self._wait( 'Waiting for volume {} to be deleted...'.format(volume_id), lambda operation: operation.done() is True, None, disk_deletion_operation) delete_response = disk_deletion_operation.result() self._remove_volume(volume_id) self.logger.info( '{} SUCCESS: volume-id={} with tags={}\n{}'.format( log_prefix, volume_id, self.tags, delete_response)) return True except Exception as error: message = '{} ERROR: volume-id={}\n{}'.format( log_prefix, volume_id, error) self.logger.error(message) raise Exception(message) def _create_attachment(self, volume_id, instance_id): log_prefix = '[ATTACHMENT] [CREATE]' attachment = None try: virtual_machine = self.compute_client.virtual_machines.get( self.resource_group, instance_id) volume = self.compute_client.disks.get(self.resource_group, volume_id) all_data_disks = virtual_machine.storage_profile.data_disks # traversing through all disks and finding next balnk lun next_lun = 0 for disk in all_data_disks: if disk.lun == next_lun: next_lun += 1 existing_devices_path = glob.glob( self.DEVICE_PATH_TEMPLATE.format(self.scsi_host_number, next_lun)) virtual_machine.storage_profile.data_disks.append({ 'lun': next_lun, 'name': volume.name, 'create_option': DiskCreateOptionTypes.attach, 'managed_disk': { 'id': volume.id } }) disk_attach_operation = self.compute_client.virtual_machines.create_or_update( self.resource_group, virtual_machine.name, virtual_machine) self._wait( 'Waiting for attachment of volume {} to get ready...'.format( volume_id), lambda operation: operation.done() is True, None, disk_attach_operation) updated_vm = disk_attach_operation.result() all_devices_path = glob.glob( self.DEVICE_PATH_TEMPLATE.format(self.scsi_host_number, next_lun)) new_devices_path = list( set(all_devices_path) - set(existing_devices_path)) if len(new_devices_path) > 1: raise Exception( 'Found more than one new devices while attaching volume!') device = '/dev/{}'.format( self.shell('ls {}'.format(new_devices_path[0])).rstrip()) self._add_volume_device(volume_id, device) attachment = Attachment(0, volume_id, instance_id) self._add_attachment(volume_id, instance_id) self.logger.info( '{} SUCCESS: volume-id={}, instance-id={}\n Updated vm:{}'. format(log_prefix, volume_id, instance_id, updated_vm)) except Exception as error: message = '{} ERROR: volume-id={}, instance-id={}\n{}'.format( log_prefix, volume_id, instance_id, error) self.logger.error(message) # The following lines are a workaround in case of inconsistency: # The attachment process may end with throwing an Exception, e.g. # 'list index out of range', but the attachment has been successful. Therefore, we must # check whether the volume is attached and if yes, trigger the detachment # TODO : Following need to take care: volume.status is not in-use in case of Azure volume = self.compute_client.disks.get(self.resource_group, volume_id) if volume.managed_by is not None: self.logger.warning( '[VOLUME] [DELETE] Volume is attached although the attaching process failed, ' 'triggering detachment') attachment = True if attachment: self.delete_attachment(volume_id, instance_id) attachment = None raise Exception(message) return attachment def _delete_attachment(self, volume_id, instance_id): log_prefix = '[ATTACHMENT] [DELETE]' try: virtual_machine = self.compute_client.virtual_machines.get( self.resource_group, instance_id) data_disks = virtual_machine.storage_profile.data_disks data_disks[:] = [ disk for disk in data_disks if disk.name != volume_id ] disk_detach_operation = self.compute_client.virtual_machines.create_or_update( self.resource_group, virtual_machine.name, virtual_machine) self._wait( 'Waiting for attachment of volume {} to be removed...'.format( volume_id), lambda operation: operation.done() is True, None, disk_detach_operation) updated_vm = disk_detach_operation.result() self._remove_volume_device(volume_id) self._remove_attachment(volume_id, instance_id) self.logger.info( '{} SUCCESS: volume-id={}, instance-id={}\n updated vm: {}'. format(log_prefix, volume_id, instance_id, updated_vm)) return True except Exception as error: message = '{} ERROR: volume-id={}, instance-id={}\n{}'.format( log_prefix, volume_id, instance_id, error) self.logger.error(message) raise Exception(message) def _find_volume_device(self, volume_id): # Nothing to do for Azure as the device name is specified manually while attaching a volume and therefore known pass def get_mountpoint(self, volume_id, partition=None): device = self._get_device_of_volume(volume_id) if not device: return None if partition: device += partition return device def _upload_to_blobstore(self, blob_to_upload_path, blob_target_name, max_connections=2): log_prefix = '[AZURE STORAGE CONTAINER] [UPLOAD]' self.logger.info( '{} Started to upload the tarball to the object storage.'.format( log_prefix)) try: self.block_blob_service.MAX_BLOCK_SIZE = self.max_block_size self.block_blob_service.create_blob_from_path( self.CONTAINER, blob_target_name, blob_to_upload_path, max_connections=max_connections) # TODO: need to check above 'blob_target_name' self.logger.info( '{} SUCCESS: blob_to_upload={}, blob_target_name={}, container={}' .format(log_prefix, blob_to_upload_path, blob_target_name, self.CONTAINER)) return True except Exception as error: message = '{} ERROR: blob_to_upload={}, blob_target_name={}, container={}\n{}'.format( log_prefix, blob_to_upload_path, blob_target_name, self.CONTAINER, error) self.logger.error(message) raise Exception(message) def _download_from_blobstore(self, blob_to_download_name, blob_download_target_path, max_connections=2): log_prefix = '[AZURE STORAGE CONTAINER] [DOWNLOAD]' self.logger.info( '{} Started to download the tarball to target {}.'.format( log_prefix, blob_download_target_path)) try: self.block_blob_service.MAX_BLOCK_SIZE = self.max_block_size self.block_blob_service.get_blob_to_path( self.CONTAINER, blob_to_download_name, blob_download_target_path, max_connections=max_connections) self.logger.info( '{} SUCCESS: blob_to_download={}, blob_target_name={}, container={}' .format(log_prefix, blob_to_download_name, self.CONTAINER, blob_download_target_path)) return True except Exception as error: message = '{} ERROR: blob_to_download={}, blob_target_name={}, container={}\n{}'.format( log_prefix, blob_to_download_name, blob_download_target_path, self.CONTAINER, error) self.logger.error(message) raise Exception(message) def _download_from_blobstore_and_pipe_to_process(self, process, blob_to_download_name, segment_size): self.block_blob_service.get_blob_to_stream(self.CONTAINER, blob_to_download_name, process.stdin, snapshot=None, start_range=0, end_range=segment_size - 1) return True
import os import time import src.camera_helper as ch from dotenv import load_dotenv load_dotenv() from flask import Flask from flask import request from flask import jsonify app = Flask(__name__) from azure.storage.blob import BlockBlobService block_blob_service = BlockBlobService(os.getenv("AZURE_BLOB_ACCOUNT_NAME"), os.getenv("AZURE_BLOB_ACCOUNT_KEY")) container_name = os.getenv("AZURE_BLOB_CONTAINER_NAME") have_camera = False camera = 0 try: from picamera import PiCamera print("imported") camera = PiCamera(resolution=(3280, 2464)) print("set camera - taking warmup") full_path = './pictures/startup.png' print("Taking the picture") camera.capture(full_path) have_camera = True except: print("No camera module")
if __name__ == '__main__': storage_account = sys.argv[1] storage_key = sys.argv[2] batch_account = sys.argv[3] batch_key = sys.argv[4] batch_url = sys.argv[5] table_name = sys.argv[6] job_id = sys.argv[7] entity_pk = sys.argv[8] entity_rk = sys.argv[9] table_service = TableService(account_name=storage_account, account_key=storage_key) blob_service = BlockBlobService(account_name=storage_account, account_key=storage_key) credentials = batchauth.SharedKeyCredentials(batch_account, batch_key) batch_client = batch.BatchServiceClient(credentials, base_url=batch_url) entity = table_service.get_entity(table_name, entity_pk, entity_rk) wait_for_tasks_to_complete(table_service, batch_client, table_name, entity, job_id) if table_name == 'DatabaseEntity': container_name = sys.argv[10] files = 0 total_size = 0 db_type = 'Nucleotide' generator = blob_service.list_blobs(container_name, prefix=entity_rk + '.') for blob in generator:
class RemoteAzure(RemoteBase): scheme = "azure" REGEX = ( r"azure://((?P<path>[^=;]*)?|(" # backward compatibility r"(ContainerName=(?P<container_name>[^;]+);?)?" r"(?P<connection_string>.+)?)?)$" ) REQUIRES = {"azure-storage-blob": BlockBlobService} PARAM_CHECKSUM = "etag" COPY_POLL_SECONDS = 5 def __init__(self, repo, config): super(RemoteAzure, self).__init__(repo, config) self.url = config.get(Config.SECTION_REMOTE_URL, "azure://") match = re.match(self.REGEX, self.url) # backward compatibility path = match.group("path") self.bucket = ( urlparse(self.url if path else "").netloc or match.group("container_name") # backward compatibility or os.getenv("AZURE_STORAGE_CONTAINER_NAME") ) self.prefix = urlparse(self.url).path.lstrip("/") if path else "" self.connection_string = ( config.get(Config.SECTION_AZURE_CONNECTION_STRING) or match.group("connection_string") # backward compatibility or os.getenv("AZURE_STORAGE_CONNECTION_STRING") ) if not self.bucket: raise ValueError("azure storage container name missing") if not self.connection_string: raise ValueError("azure storage connection string missing") self.__blob_service = None self.path_info = {"scheme": self.scheme, "bucket": self.bucket} @property def blob_service(self): if self.__blob_service is None: logger.debug("URL {}".format(self.url)) logger.debug("Connection string {}".format(self.connection_string)) self.__blob_service = BlockBlobService( connection_string=self.connection_string ) logger.debug("Container name {}".format(self.bucket)) try: # verify that container exists self.__blob_service.list_blobs( self.bucket, delimiter="/", num_results=1 ) except AzureMissingResourceHttpError: self.__blob_service.create_container(self.bucket) return self.__blob_service def remove(self, path_info): if path_info["scheme"] != self.scheme: raise NotImplementedError logger.debug( "Removing azure://{}/{}".format( path_info["bucket"], path_info["path"] ) ) self.blob_service.delete_blob(path_info["bucket"], path_info["path"]) def _list_paths(self, bucket, prefix): blob_service = self.blob_service next_marker = None while True: blobs = blob_service.list_blobs( bucket, prefix=prefix, marker=next_marker ) for blob in blobs: yield blob.name if not blobs.next_marker: break next_marker = blobs.next_marker def list_cache_paths(self): return self._list_paths(self.bucket, self.prefix) def upload(self, from_infos, to_infos, names=None): names = self._verify_path_args(to_infos, from_infos, names) for from_info, to_info, name in zip(from_infos, to_infos, names): if to_info["scheme"] != self.scheme: raise NotImplementedError if from_info["scheme"] != "local": raise NotImplementedError bucket = to_info["bucket"] path = to_info["path"] logger.debug( "Uploading '{}' to '{}/{}'".format( from_info["path"], bucket, path ) ) if not name: name = os.path.basename(from_info["path"]) cb = Callback(name) try: self.blob_service.create_blob_from_path( bucket, path, from_info["path"], progress_callback=cb ) except Exception: msg = "failed to upload '{}'".format(from_info["path"]) logger.warning(msg) else: progress.finish_target(name) def download( self, from_infos, to_infos, no_progress_bar=False, names=None, resume=False, ): names = self._verify_path_args(from_infos, to_infos, names) for to_info, from_info, name in zip(to_infos, from_infos, names): if from_info["scheme"] != self.scheme: raise NotImplementedError if to_info["scheme"] != "local": raise NotImplementedError bucket = from_info["bucket"] path = from_info["path"] logger.debug( "Downloading '{}/{}' to '{}'".format( bucket, path, to_info["path"] ) ) tmp_file = tmp_fname(to_info["path"]) if not name: name = os.path.basename(to_info["path"]) cb = None if no_progress_bar else Callback(name) makedirs(os.path.dirname(to_info["path"]), exist_ok=True) try: self.blob_service.get_blob_to_path( bucket, path, tmp_file, progress_callback=cb ) except Exception: msg = "failed to download '{}/{}'".format(bucket, path) logger.warning(msg) else: move(tmp_file, to_info["path"]) if not no_progress_bar: progress.finish_target(name)
if __name__ == '__main__': if len(sys.argv) < 4: print( "Start and end dates are expected. Example: python {0} <joined_data> <start_model> <num_models>" .format(sys.argv[0])) joined_data = sys.argv[1] start_model = sys.argv[2] num_models = int(sys.argv[3]) config = configparser.ConfigParser() config.read('ds.config') ds = config['DecisionService'] cache_folder = ds['CacheFolder'] block_blob_service = BlockBlobService( account_name=ds['AzureBlobStorageAccountName'], account_key=ds['AzureBlobStorageAccountKey']) joined_data_index = {} # index joined data with open(joined_data, 'r', encoding='utf8') as f: pos = f.tell() line = f.readline() while len(line) != 0: evt = json.loads(line) if '_eventid' in evt: joined_data_index[evt['_eventid']] = pos pos = f.tell() line = f.readline()
class Blob: """ Blob is the implementation for azure blob storage. Args: app: The current Flask app. If not provided init_app must be called before using this object. Returns: A `Blob` object. """ # The container name can only contain letters, chars or '-' PROFILE_PICTURE_CONTAINER = 'profile-picture' def __init__(self, app=None): if app: self.init_app(app) def init_app(self, app): """ Initializes the Blob object. Args: app: The currently running Flask app. """ self.app = app self._is_production = self.app.config['PRODUCTION'] if self._is_production: blob_config = self.app.config['BLOBSTORE'] self._service = BlockBlobService(account_name=blob_config['ACCOUNT'], account_key=blob_config['ACCOUNT_KEY']) if not self._service.exists(self.PROFILE_PICTURE_CONTAINER): self._service.create_container(self.PROFILE_PICTURE_CONTAINER) def create_blob_from_bytes(self, container, name, byte_array): """ Wraps create blob from bytes service if in production. Otherwise saves the data to disk. Args: container: The azure blob service container. name: The name of the blob. byte_array: The data to be put in the blob. """ name = str(name) if self._is_production: self._service.create_blob_from_bytes(container, name, bytes(byte_array)) else: # this just saves the file locally, used for dev and testing file_name = make_file_name(container, name) if not os.path.exists(os.path.dirname(file_name)): os.makedirs(os.path.dirname(file_name)) with open(file_name, "wb") as blob_file: blob_file.write(byte_array) def delete_blob(self, container, name): """ Wraps delete blob service if in production. Otherwise deletes the data from disk. Args: container: The container which contains the blob. name: The name of the blob in the container. """ name = str(name) if self._is_production: self._service.delete_blob(container, name) else: file_name = make_file_name(container, name) try: os.remove(file_name) except OSError: pass def exists(self, container, name): """ Wraps exists blob service if in production. Otherwise check if the file is on disk. Args: container: The container where the blob resides. name: The name of the blob. Returns: bool: Whether the blob exits. """ name = str(name) if self._is_production: return self._service.exists(container, name) file_name = make_file_name(container, name) return os.path.isfile(file_name) def get_blob_to_bytes(self, container, name): """ Wraps get blob to bytes service. Args: container: The container where the blob resides. name: The name of the blob. Returns: bytearray: The data contained in the blob. """ name = str(name) if self._is_production: return bytearray(self._service.get_blob_to_bytes(container, name).content) else: if not self.exists(container, name): return None file_name = make_file_name(container, name) with open(file_name, "rb") as blob_file: file_contents = blob_file.read() file_bytes = bytearray(file_contents) return file_bytes
#created a quickstart.py file to create container, blobs and test files in the blob . Wrote "Hello World" in that test file using this python program import os import uuid import random from azure.storage.blob import BlobServiceClient from azure.storage.blob import BlockBlobService from azure.storage.blob import BlobClient from azure.storage.blob import ContainerClient try: #Accessing Azure account using Account name and Account key block_blob_service = BlockBlobService( account_name='mcloudmesh', account_key= 'yo86DzS1cZaV1DHzFyjpMkwIeW2a4LbSnQREJTRdTstjaLrOubU5iaDCmuiX7xsF5jcI1iNWFpLpquA6mu1T+w==' ) print("Azure Blob storage v12 - Python quickstart sample") connect_str = os.getenv('AZURE_STORAGE_CONNECTION_STRING') blob_service_client = BlobServiceClient.from_connection_string(connect_str) container_name = "quickstart3" + str(uuid.uuid4()) container_client = blob_service_client.create_container(container_name) # Create a file in local Documents directory to upload and download local_path = "c:/users/hp/quickstart/data" local_file_name = "quickstart3" + str(uuid.uuid4()) + ".txt" upload_file_path = os.path.join(local_path, local_file_name) # Write text to the file
def start_storage(first_run): logging.info("Azure Storage starting.") current_path = dirname(abspath(__file__)) path = "{}/last_dates.json".format(current_path) storage_time = format_date(args.storage_time_offset) time_format = str(storage_time) length_time_format = len(time_format) - 7 time_format = time_format[:length_time_format] time_format_storage = datetime.datetime.strptime(time_format, '%Y-%m-%d %H:%M:%S') try: all_dates = json.load(open(path)) except Exception as e: logging.error( "Error: The file of the last dates could not be updated: '{}.". format(e)) try: # Authentication logging.info("Storage: Authenticating.") if args.storage_auth_path: auth_fields = read_auth_path(args.storage_auth_path) block_blob_service = BlockBlobService( account_name=auth_fields['id'], account_key=auth_fields['key']) logging.info("Storage: Authenticated.") elif args.account_name and args.account_key: block_blob_service = BlockBlobService( account_name=args.account_name, account_key=args.account_key) logging.info("Storage: Authenticated.") else: logging.error( "Storage: No parameters have been provided for authentication." ) logging.info("Storage: Getting containers.") # Getting containers from the storage account if container_format == '*': try: containers = block_blob_service.list_containers() except Exception as e: logging.error( "Storage: The containers could not be obtained. '{}'.". format(e)) # Getting containers from the configuration file else: try: containers = [container_format] except Exception as e: logging.error( "Storage: The containers could not be obtained. '{}'.". format(e)) # Getting blobs get_blobs(containers, block_blob_service, time_format_storage, first_run, all_dates, path) except Exception as e: logging.error(" Storage account: '{}'.".format(e)) logging.info("Storage: End")
svc_pr = ServicePrincipalAuthentication( tenant_id=tenant_id, service_principal_id=service_principal_id, service_principal_password=service_principal_password, ) ws = Workspace(ws.subscription_id, ws.resource_group, ws.name, auth=svc_pr) print(ws.name, ws.resource_group, ws.location, ws.subscription_id, sep="\n") def_blob_store = ws.get_default_datastore() print("Blobstore's name: {}".format(def_blob_store.name)) # create a list of datasets stored in blob print("Checking for new datasets") blob_service = BlockBlobService(def_blob_store.account_name, def_blob_store.account_key) generator = blob_service.list_blobs(def_blob_store.container_name, prefix="prednet/data/raw_data") datasets = [] for blob in generator: dataset = blob.name.split("/")[3] if (dataset not in datasets and dataset.startswith("UCSD") and not dataset.endswith("txt")): datasets.append(dataset) print("Found dataset:", dataset) # Get all published pipeline objects in the workspace all_pub_pipelines = PublishedPipeline.list(ws) # Create a list of datasets for which we have (old) and don't have (new) a # published pipeline
def __init__(self, account_name, account_key, container_name): self.container_name = container_name self.blob_service = BlockBlobService(account_name=account_name, account_key=account_key)
def download_container(app_id, log_dir, container=None, conn_string=None, account_name=None, sas_token=None, start_date=None, end_date=None, overwrite_mode=0, dry_run=False, version=2, verbose=False, create_gzip_mode=-1, delta_mod_t=3600, max_connections=4, confirm=False, report_progress=True, if_match=None, keep_invalid_eof=False, max_download_size=None): t_start = time.time() if not container: container = app_id print('------' * 10) print('Current UTC time: {}'.format( datetime.datetime.now(datetime.timezone.utc))) print('app_id: {}'.format(app_id)) print('container: {}'.format(container)) print('log_dir: {}'.format(log_dir)) print('Start Date: {}'.format(start_date)) print('End Date: {}'.format(end_date)) print('Overwrite mode: {}'.format(overwrite_mode)) print('dry_run: {}'.format(dry_run)) print('version: {}'.format(version)) print('create_gzip_mode: {}'.format(create_gzip_mode)) print('------' * 10) if not dry_run: os.makedirs(os.path.join(log_dir, app_id), exist_ok=True) output_fp = None if version == 1: # using C# api for uncooked logs output_fp = os.path.join( log_dir, app_id, app_id + '_' + start_date.strftime("%Y-%m-%d") + '_' + end_date.strftime("%Y-%m-%d") + '.json') print('Destination: {}'.format(output_fp)) do_download = True if os.path.isfile(output_fp): if overwrite_mode in {0, 3, 4}: print('Output file already exits. Not downloading'.format( output_fp)) do_download = False elif overwrite_mode == 1 and input( 'Output file already exits. Do you want to overwrite [Y/n]? ' .format(output_fp)) not in {'Y', 'y'}: do_download = False if do_download: if dry_run: print('--dry_run - Not downloading!') else: print('Downloading...', end='') try: import requests LogDownloaderURL = "https://cps-staging-exp-experimentation.azurewebsites.net/api/Log?account={ACCOUNT_NAME}&key={ACCOUNT_KEY}&start={START_DATE}&end={END_DATE}&container={CONTAINER}" conn_string_dict = dict( x.split('=', 1) for x in conn_string.split(';')) if not conn_string_dict['AccountName'] or len( conn_string_dict['AccountKey']) != 88: print("Error: Invalid Azure Storage ConnectionString.") sys.exit() url = LogDownloaderURL.format( ACCOUNT_NAME=conn_string_dict['AccountName'], ACCOUNT_KEY=conn_string_dict['AccountKey'].replace( '+', '%2b'), CONTAINER=container, START_DATE=start_date.strftime("%Y-%m-%d"), END_DATE=( end_date + datetime.timedelta(days=1)).strftime("%Y-%m-%d")) r = requests.post(url) open(output_fp, 'wb').write(r.content) print(' Done!\n') except Exception as e: print('Error: {}'.format(e)) else: # using BlockBlobService python api for cooked logs try: print( 'Establishing Azure Storage BlockBlobService connection using ', end='') if sas_token and account_name: print('sas token...') bbs = BlockBlobService(account_name=account_name, sas_token=sas_token) else: print('connection string...') bbs = BlockBlobService(connection_string=conn_string) # List all blobs and download them one by one print('Getting blobs list...') blobs = bbs.list_blobs(container) except Exception as e: if type(e.args[0]) == str and e.args[0].startswith( 'The specified container does not exist.'): print("Error: The specified container ({}) does not exist.". format(container)) else: print("Error:\nType: {}\nArgs: {}".format( type(e).__name__, e.args)) sys.exit() print('Iterating through blobs...\n') selected_fps = [] for blob in blobs: if '/data/' not in blob.name: if verbose: print('{} - Skip: Non-data blob\n'.format(blob.name)) continue blob_day = datetime.datetime.strptime( blob.name.split('/data/', 1)[1].split('_', 1)[0], '%Y/%m/%d') if (start_date and blob_day < start_date) or (end_date and end_date < blob_day): if verbose: print('{} - Skip: Outside of date range\n'.format( blob.name)) continue try: bp = bbs.get_blob_properties(container, blob.name) if confirm: if input("{} - Do you want to download [Y/n]? ".format( blob.name)) not in {'Y', 'y'}: print() continue fp = os.path.join(log_dir, app_id, blob.name.replace('/', '_')) selected_fps.append(fp) if os.path.isfile(fp): file_size = os.path.getsize(fp) if overwrite_mode == 0: if verbose: print('{} - Skip: Output file already exits\n'. format(blob.name)) continue elif overwrite_mode in {1, 3, 4}: if file_size == bp.properties.content_length: # file size is the same, skip! if verbose: print( '{} - Skip: Output file already exits with same size\n' .format(blob.name)) continue print( 'Output file already exits: {}\nLocal size: {:.3f} MB\nAzure size: {:.3f} MB' .format(fp, file_size / (1024**2), bp.properties.content_length / (1024**2))) if overwrite_mode in { 3, 4 } and file_size > bp.properties.content_length: # local file size is larger, skip with warning! print( '{} - Skip: Output file already exits with larger size\n' .format(blob.name)) continue if overwrite_mode == 1 and input( "Do you want to overwrite [Y/n]? ") not in { 'Y', 'y' }: print() continue else: file_size = None print('Processing: {} (size: {:.3f}MB - Last modified: {})'. format(blob.name, bp.properties.content_length / (1024**2), bp.properties.last_modified)) # check if blob was modified in the last delta_mod_t sec if datetime.datetime.now( datetime.timezone.utc ) - bp.properties.last_modified < datetime.timedelta( 0, delta_mod_t): if overwrite_mode < 2: if input( "Azure blob currently in use (modified in the last delta_mod_t={} sec). Do you want to download anyway [Y/n]? " .format(delta_mod_t)) not in {'Y', 'y'}: print() continue elif overwrite_mode == 4: print( 'Azure blob currently in use (modified in the last delta_mod_t={} sec). Skipping!\n' .format(delta_mod_t)) continue if if_match != '*': # when if_match is not '*', reset max_connections to 1 to prevent crash if azure blob is modified during download max_connections = 1 if dry_run: print('--dry_run - Not downloading!') else: t0 = time.time() process_checker = update_progress if report_progress == True else None if overwrite_mode in {3, 4} and file_size: if max_download_size is None or file_size < max_download_size: print('Check validity of remote file... ', end='') temp_fp = fp + '.temp' cmpsize = min(file_size, 8 * 1024**2) bbs.get_blob_to_path( container, blob.name, temp_fp, max_connections=max_connections, start_range=file_size - cmpsize, end_range=file_size - 1, if_match=if_match) if cmp_files(fp, temp_fp, -cmpsize): print('Valid!') print( 'Resume downloading to temp file with max_connections = {}...' .format(max_connections)) bbs.get_blob_to_path( container, blob.name, temp_fp, progress_callback=process_checker, max_connections=max_connections, start_range=os.path.getsize(fp), if_match=if_match, end_range=max_download_size) download_time = time.time() - t0 download_size_MB = os.path.getsize(temp_fp) / ( 1024**2) # file size in MB print('\nAppending to local file...') with open(fp, 'ab') as f1, open(temp_fp, 'rb') as f2: shutil.copyfileobj( f2, f1, length=100 * 1024**2 ) # writing chunks of 100MB to avoid consuming memory print( 'Appending completed. Deleting temp file...' ) os.remove(temp_fp) else: os.remove(temp_fp) print('Invalid! - Skip\n') continue print( 'Downloaded {:.3f} MB in {:.1f} sec. ({:.3f} MB/sec) - Total elapsed time: {:.1f} sec.' .format(download_size_MB, download_time, download_size_MB / download_time, time.time() - t0)) else: print( 'Downloading with max_connections = {}...'.format( max_connections)) bbs.get_blob_to_path(container, blob.name, fp, progress_callback=process_checker, max_connections=max_connections, if_match=if_match, start_range=0, end_range=max_download_size) download_time = time.time() - t0 download_size_MB = os.path.getsize(fp) / ( 1024**2) # file size in MB print( '\nDownloaded {:.3f} MB in {:.1f} sec. ({:.3f} MB/sec)' .format(download_size_MB, download_time, download_size_MB / download_time)) if not keep_invalid_eof: erase_invalid_end_line(fp) print() except Exception as e: print('Error: {}'.format(e)) if create_gzip_mode > -1: if selected_fps: selected_fps = [x for x in selected_fps if os.path.isfile(x)] if create_gzip_mode == 0: models = {} for fp in selected_fps: models.setdefault( os.path.basename(fp).split('_data_', 1)[0], []).append(fp) for model in models: models[model].sort(key=lambda x: list( map(int, x.split('_data_')[1].split('_')[:3]))) start_date = '-'.join( models[model][0].split('_data_')[1].split('_')[:3]) end_date = '-'.join(models[model][-1].split('_data_') [1].split('_')[:3]) output_fp = os.path.join( log_dir, app_id, app_id + '_' + model + '_data_' + start_date + '_' + end_date + '.json.gz') print( 'Concat and zip files of LastConfigurationEditDate={} to: {}' .format(model, output_fp)) if os.path.isfile( output_fp ) and __name__ == '__main__' and input( 'Output file already exits. Do you want to overwrite [Y/n]? ' .format(output_fp)) not in {'Y', 'y'}: continue if dry_run: print('--dry_run - Not downloading!') else: with gzip.open(output_fp, 'wb') as f_out: for fp in models[model]: print('Adding: {}'.format(fp)) with open(fp, 'rb') as f_in: shutil.copyfileobj( f_in, f_out, length=100 * 1024**2 ) # writing chunks of 100MB to avoid consuming memory elif create_gzip_mode == 1: selected_fps.sort(key=lambda x: (list( map(int, x.split('_data_')[1].split('_')[:3])), -os.path. getsize(x), x)) selected_fps_merged = [] last_fp_date = None for fp in selected_fps: fp_date = datetime.datetime.strptime( '_'.join(fp.split('_data_')[1].split('_')[:3]), "%Y_%m_%d") if fp_date != last_fp_date: selected_fps_merged.append(fp) last_fp_date = fp_date start_date = '-'.join(selected_fps_merged[0].split( '_data_')[1].split('_')[:3]) end_date = '-'.join(selected_fps_merged[-1].split('_data_') [1].split('_')[:3]) output_fp = os.path.join( log_dir, app_id, app_id + '_merged_data_' + start_date + '_' + end_date + '.json.gz') print( 'Merge and zip files of all LastConfigurationEditDate to: {}' .format(output_fp)) if not os.path.isfile( output_fp ) or __name__ == '__main__' and input( 'Output file already exits. Do you want to overwrite [Y/n]? ' .format(output_fp)) in {'Y', 'y'}: if dry_run: for fp in selected_fps_merged: print('Adding: {}'.format(fp)) print('--dry_run - Not downloading!') else: with gzip.open(output_fp, 'wb') as f_out: for fp in selected_fps_merged: print('Adding: {}'.format(fp)) with open(fp, 'rb') as f_in: shutil.copyfileobj( f_in, f_out, length=1024**3 ) # writing chunks of 1GB to avoid consuming memory elif create_gzip_mode == 2: selected_fps.sort(key=lambda x: (list( map(int, x.split('_data_')[1].split('_')[:3])), -os.path. getsize(x), x)) start_date = '-'.join( selected_fps[0].split('_data_')[1].split('_')[:3]) end_date = '-'.join( selected_fps[-1].split('_data_')[1].split('_')[:3]) output_fp = os.path.join( log_dir, app_id, app_id + '_deepmerged_data_' + start_date + '_' + end_date + '.json.gz') print( 'Merge, unique, sort, and zip files of all LastConfigurationEditDate to: {}' .format(output_fp)) if not os.path.isfile( output_fp ) or __name__ == '__main__' and input( 'Output file already exits. Do you want to overwrite [Y/n]? ' .format(output_fp)) in {'Y', 'y'}: d = {} for fn in selected_fps: print('Parsing: {}'.format(fn), end='', flush=True) if not dry_run: for x in open(fn, 'rb'): if x.startswith( b'{"_label_cost' ) and x.strip().endswith( b'}'): # reading only cooked lined data = ds_parse.json_cooked(x) if data is not None and ( data['ei'] not in d or float(data['cost']) < d[data['ei']][1] ): # taking line with best reward d[data['ei']] = (data['ts'], float( data['cost']), x) print(' - len(d): {}'.format(len(d))) print('Writing to output .gz file...') if dry_run: print('--dry_run - Not downloading!') else: with gzip.open(output_fp, 'wb') as f: i = 0 for x in sorted(d.values(), key=lambda x: x[ 0]): # events are sorted by timestamp f.write(x[2]) i += 1 if i % 5000 == 0: update_progress(i, len(d)) update_progress(i, len(d)) print() else: print( 'Unrecognized --create_gzip_mode: {}, skipping creating gzip files.' .format(create_gzip_mode)) else: print('No file downloaded, skipping creating gzip files.') print('Total elapsed time: {:.1f} sec.\n'.format(time.time() - t_start)) return output_fp
import uuid today = date.today() coronaFileName = "WorldWideCovidNinty.csv" AllCountryCoronainfoCSV = df.to_csv(index_label="idx", encoding="utf-8") BdCoronaInfo = "BagnadeshCovidNinty.csv" BdCoronaInfoCsv = BdDataFrame.to_csv(index_label="idx", encoding="utf-8") TotalCoronaInfo = "TotalCovidNinty.csv" TotalCoronaInfoCsv = TotalCoronaDataFrame.to_csv(index_label="idx", encoding="utf-8") #!pip install azure-storage-blob==0.37.1 #block_blob_service.create_container('mycontainer') from azure.storage.blob import BlockBlobService from azure.storage.blob import ContentSettings block_blob_service = BlockBlobService(account_name='', account_key='') #Upload the CSV file to Azure blob cloud block_blob_service.create_blob_from_text('mycontainer', coronaFileName, AllCountryCoronainfoCSV) block_blob_service.create_blob_from_text('mycontainer', BdCoronaInfo, BdCoronaInfoCsv) block_blob_service.create_blob_from_text('mycontainer', TotalCoronaInfo, TotalCoronaInfoCsv) print("Successfully end covitNinty Web Scraping...........")
def run_sample(account_name, account_key, container_name): try: # Create the BlockBlockService that is used to call the Blob service for the storage account block_blob_service = BlockBlobService( account_name=account_name, account_key=account_key) # Create a container # すでに存在していた場合は何も起こらない。 block_blob_service.create_container(container_name) # Set the permission so the blobs are public. block_blob_service.set_container_acl(container_name, public_access=PublicAccess.Container) # Create a file in the current folder to test the upload and download. #local_path=os.path.expanduser("~/Documents") local_path=os.getcwd() local_file_name ="QuickStart_" + str(uuid.uuid4()) + ".txt" full_path_to_file =os.path.join(local_path, local_file_name) # Write text to the file. file = open(full_path_to_file, 'w') file.write("Hello, World!") file.close() print("Temp file = " + full_path_to_file) print("\nUploading to Blob storage as blob " + local_file_name) # アップロードする Blob に付随させるメタデータ # value に空白文字を含めることはできるが、key に空白文字を含めると、アップロード用メソッドを実行したときエラーになる。 # ポータル上で Blob のメタ情報を編集する場合も同じ。 metadata = {'one': 'first first', 'two': 'second', 'three': 'third'} # Upload the created file, use local_file_name for the blob name block_blob_service.create_blob_from_path(container_name, local_file_name, full_path_to_file, metadata=metadata) # List the blobs in the container # Blob のメタ情報は、デフォルトでは取得されないので、include キーワード引数で指定してやる必要がある。 print("\nList blobs in the container") generator = block_blob_service.list_blobs(container_name, include=Include.METADATA) for blob in generator: # blob は、azure.storage.blob.models.Blob オブジェクトである。 # blob.properties は、azure.storage.blob.models.BlobProperties オブジェクトである。 print("\t Blob name: " + blob.name) print("\t Blob type: " + blob.properties.blob_type) print("\t Blob content length: " + str(blob.properties.content_length)) print("\t Last modified: " + str(blob.properties.last_modified)) # list_blobs メソッドの include キーワード引数を指定しなかったなら、メタ情報を取得するのに改めて # get_blob_metadata メソッドを呼び出す必要がある。 # metadata = block_blob_service.get_blob_metadata(container_name, blob.name) metadata = blob.metadata if metadata: print("\t Metadata:") for key, value in metadata.items(): print("\t\t key = " + key + ", value = " + value) print() # Download the blob(s). # Add '_DOWNLOADED' as prefix to '.txt' so you can see both files in the current folder. full_path_to_file2 = os.path.join(local_path, str.replace(local_file_name ,'.txt', '_DOWNLOADED.txt')) print("\nDownloading blob to " + full_path_to_file2) block_blob_service.get_blob_to_path(container_name, local_file_name, full_path_to_file2) sys.stdout.write("Sample finished running. When you hit <any key>, the sample will be deleted and the sample " "application will exit.") sys.stdout.flush() input() # Clean up resources. This includes the uploaded blob and the temp files but not the container for safety. #block_blob_service.delete_container(container_name) block_blob_service.delete_blob(container_name, local_file_name) os.remove(full_path_to_file) os.remove(full_path_to_file2) except Exception as e: print(e)