def create(self, name, blob_name, label=None, container_name=None): if not container_name: container_name = self.account.storage_container() if not label: label = name try: storage = BaseBlobService( self.account.storage_name(), self.account.storage_key(), endpoint_suffix=self.account.get_blob_service_host_base() ) storage.get_blob_properties( container_name, blob_name ) except Exception as e: raise AzureBlobServicePropertyError( '%s not found in container %s' % (blob_name, container_name) ) try: media_link = storage.make_blob_url(container_name, blob_name) result = self.service.add_os_image( label, media_link, name, 'Linux' ) return Defaults.unify_id(result.request_id) except Exception as e: raise AzureOsImageCreateError( '%s: %s' % (type(e).__name__, format(e)) )
def exists(self, container): blob_service = BaseBlobService( self.account_name, self.account_key, endpoint_suffix=self.blob_service_host_base ) try: blob_service.get_container_properties(container) return True except Exception: return False
def get_sas_url(name): baseblobservice = BaseBlobService(account_name='', account_key='') sasToken = baseblobservice.generate_blob_shared_access_signature( container_name='images', blob_name=name, permission=BlobPermissions(read=True, write=True, create=True), start=datetime.now() - timedelta(hours=2), expiry=datetime.now() + timedelta(hours=2)) sasurl = baseblobservice.make_blob_url('images', name, sas_token=sasToken) return sasurl
def list(self): result = [] blob_service = BaseBlobService( self.account_name, self.account_key, endpoint_suffix=self.blob_service_host_base ) try: for container in blob_service.list_containers(): result.append(format(container.name)) except Exception as e: raise AzureContainerListError( '%s: %s' % (type(e).__name__, format(e)) ) return result
def content(self, container): result = {container: []} blob_service = BaseBlobService( self.account_name, self.account_key, endpoint_suffix=self.blob_service_host_base ) try: for blob in blob_service.list_blobs(container): result[container].append(format(blob.name)) return result except Exception as e: raise AzureContainerListContentError( '%s: %s' % (type(e).__name__, format(e)) )
def delete(self, container): blob_service = BaseBlobService( self.account_name, self.account_key, endpoint_suffix=self.blob_service_host_base ) try: blob_service.delete_container( container_name=container, fail_not_exist=True ) except Exception as e: raise AzureContainerDeleteError( '%s: %s' % (type(e).__name__, format(e)) ) return True
def __init__(self, **storage_config): account_key = storage_config[CONFIG_STORAGE_KEY] account_name = storage_config[CONFIG_STORAGE_NAME] container_name = storage_config[CONFIG_STORAGE_CONTAINER_NAME] public_url = storage_config[CONFIG_STORAGE_EXTERNAL_URL] self.services = { 'base_blob': BaseBlobService(account_name=account_name, account_key=account_key), 'block_blob': BlockBlobService(account_name=account_name, account_key=account_key), 'append_blob': AppendBlobService(account_name=account_name, account_key=account_key), } if not container_name: raise ValidationError( "You must set which container you want to use") self.container_name = container_name if not public_url: raise ValidationError("You must set public container url") self.public_url = public_url
def __generate_access_key(permission, expiry, storage_config): try: base_blob_service = BaseBlobService( account_name=storage_config['account_name'], account_key=storage_config['account_key']) sas = base_blob_service.generate_blob_shared_access_signature( storage_config['container'], storage_config['blob'], permission, expiry) return { 'sas': sas, 'base_uri': storage_config['base_uri'], 'container': storage_config['container']} except Exception: raise AzureAccountSASGenerateError()
def setUpClass(cls): cls.generate_resource_files() cls._test_connection_string = os.environ['test_connection_string'] assert cls._test_connection_string cls._blob_service = BaseBlobService(connection_string=cls._test_connection_string) assert cls._blob_service cls._test_account_key = cls._blob_service.account_key
class AzureBlobStorageClient(): def __init__(self, storage_config): self.service = BaseBlobService( account_name=storage_config['account_name'], account_key=storage_config['account_key']) def list(self, container, prefix): blobs = list(self.service.list_blobs(container, prefix=prefix)) return blobs @staticmethod def generate_read_access_key(storage_config): permission = AccountPermissions.READ expiry = datetime.datetime.utcnow() + datetime.timedelta(hours=1) return AzureBlobStorageClient.__generate_access_key(permission, expiry, storage_config) @staticmethod def generate_write_access_key(storage_config): permission = BlobPermissions(read=True, write=True, add=True, create=True) expiry = datetime.datetime.utcnow() + datetime.timedelta(hours=1) return AzureBlobStorageClient.__generate_access_key(permission, expiry, storage_config) @staticmethod def __generate_access_key(permission, expiry, storage_config): try: base_blob_service = BaseBlobService( account_name=storage_config['account_name'], account_key=storage_config['account_key']) sas = base_blob_service.generate_blob_shared_access_signature( storage_config['container'], storage_config['blob'], permission, expiry) return { 'sas': sas, 'base_uri': storage_config['base_uri'], 'container': storage_config['container']} except Exception: raise AzureAccountSASGenerateError()
def base_blob_service(self): if self._base_blob_service is None: self._base_blob_service = BaseBlobService(self.account_name, self.account_key) return self._base_blob_service
def get_blob_data(self, data: AzureBlobAccessData) -> str: service = BaseBlobService(account_name=data.account_name, sas_token=data.sas, custom_domain=data.domain) data = service.get_blob_to_text(data.container_name, data.blob_name) return data.content
def rules2(): # in rules page (metadata3.html) if request.method == "POST": details = request.form session['account_name']=details['account_name'] session['account_key']=details['account_key'] session['ContainerName']=details['ContainerName'] session['Blob Name']=details['Blob Name'] session['azure file format']=details['azure file format'] session['azure file delimiter']=details['azure file delimiter'] azureblob_parameter_dictionary={"StorageAccountAccessKey":session['account_key'],"StorageAccountName":session['account_name'],"ContainerName":session['ContainerName'],"Path":session['Blob Name'],"Format":session['azure file format'],"Delimiter":session['azure file delimiter']} session['azureblob_parameter_dictionary_string'] = str(azureblob_parameter_dictionary) print(session['azureblob_parameter_dictionary_string']) account_name = session['account_name'] account_key = session['account_key'] container_name = session['ContainerName'] blob_name = session['Blob Name'] url = f"https://{account_name}.blob.core.windows.net/{container_name}/{blob_name}" service = BaseBlobService(account_name=account_name, account_key=account_key) token = service.generate_blob_shared_access_signature(container_name, blob_name, permission=BlobPermissions.READ, expiry=datetime.utcnow() + timedelta(hours=1),) #print(url) session1 = requests.Session() response = session1.get(f"{url}?{token}", stream = True) a = session['azure file delimiter'] #variable with closing(response) as r: reader = csv.reader(codecs.iterdecode(r.iter_lines(), 'latin-1'), delimiter=a , quotechar='"',quoting=csv.QUOTE_MINIMAL ) #print(reader) lst = [] a=[] for row in islice(reader,0,5): #print(row) for cell in row: y=conv2(cell) a.append(y) lst.append(a) a=[] df = pd.DataFrame(lst[1:],columns=lst[0]) lookup_data = list(df.columns) mydb = mysql.connector.connect(host="demetadata.mysql.database.azure.com",user="******",passwd="Tredence@123",database = "deaccelator") cursor = mydb.cursor() cursor.execute("SELECT concat(ColumnName,'`~`',Description) as x FROM metadata WHERE EntryID = %s ;"%(session['EntryID'])) data = cursor.fetchall() data1= list(data) cursor.execute("SELECT Name as x FROM centralrulerepo") data2 = cursor.fetchall() data3= list(data2) # print(data1) out = [item for t in data1 for item in t] out1 = [item for t in data3 for item in t] # printing output #print(out) mydb.commit() cursor.close() value = session['file exists'] account = session['username'] if session['rule exists']=="Yes": msg="Rule Name already Taken!" elif session['rule exists']=="Inserted": msg="New Rule "+session['RuleName']+" Added" elif session['rule exists']=="No": msg="" return render_template("metadata3.html", data3 = lookup_data,data = out,data1=out1,account = account,value=value,msg=msg)
def index(): # for flat file and mysql metadata SourceType = session['source location type'] if SourceType == 'MySql': mydb = mysql.connector.connect(host=session['hostname'],user=session['user'],passwd=session['password'],database = session['database name']) cursor = mydb.cursor() cursor.execute("DROP VIEW IF EXISTS temp") cursor.execute("CREATE VIEW temp AS "+session['source query']+" LIMIT 1 ") cursor.execute("DESCRIBE temp") data = cursor.fetchall() df = pd.DataFrame(data, columns=['Column Name' ,'Data Type' ,'Nullable' ,'Primary Key' ,'Default' ,'Description']) df = df.assign(ColumnNumber=[i+1 for i in range(len(df))])[['ColumnNumber'] + df.columns.tolist()] df4= df.drop(['Primary Key','Nullable','Description','Default'], axis = 1) df4['Column Number']= df4['ColumnNumber'] df5 = df4.drop(['ColumnNumber'], axis = 1) df5['Description'] = df5["Column Name"].map(lambda row: Business_Description(row)) cursor.execute(session['source query']+" LIMIT 100 ") data2= cursor.fetchall() df_temp = pd.DataFrame(data2,columns=df5["Column Name"].tolist()) adict = {} for col in df_temp.columns.tolist(): df_temp[col] = df_temp[col].map(lambda ele:pii(ele)) x = df_temp[col].value_counts().idxmax() adict[col]= x df5["PII Type"] = df5["Column Name"].map(lambda ele :adict[ele]) df5["PII"] = df5["PII Type"].map(lambda ele:"No" if ele =="None" else "Yes" ) columns_order=['Column Number','Column Name','Data Type','Description',"PII","PII Type"] df6 = df5.reindex(columns=columns_order) cursor.execute(" DROP VIEW temp ") mydb.commit() cursor.close() value = session['file exists'] account = session['username'] return render_template("metadata.html", column_names=df6.columns.values, row_data=list(df6.values.tolist()), zip=zip, value=value,account = account) elif SourceType == 'Google Drive': URL = 'https://docs.google.com/uc?export=download' session1 = requests.Session() file_id = session['file_id'] response = session1.get(URL, params = { 'id' : file_id }, stream = True) token = get_confirm_token(response) if token: params = { 'id' : file_id, 'confirm' : token } response = session1.get(URL, params = params, stream = True) a = session['delimiter'] with closing(response) as r: reader = csv.reader(codecs.iterdecode(r.iter_lines(), 'utf-8'), delimiter=a , quotechar='"',quoting=csv.QUOTE_MINIMAL ) lst = [] a=[] for row in islice(reader,0,10): for cell in row: y=conv2(cell) a.append(y) lst.append(a) a=[] df = pd.DataFrame(lst[1:],columns=lst[0]) df2=pd.DataFrame(df.dtypes,index=None,columns='data_type'.split()) df3 = pd.DataFrame(df.columns,columns=['Column Name']) df4=df2.replace(['int64','float64','datetime64[ns]','object'],['int','float','datetime','string']) df4.index = df3.index df3['Data Type']=df4['data_type'] df3 = df3.assign(ColumnNumber=[i+1 for i in range(len(df3))])[['ColumnNumber'] + df3.columns.tolist()] df3['Column Number']= df3['ColumnNumber'] df4 = df3.drop(['ColumnNumber'], axis = 1) df4['Description'] = df4["Column Name"].map(lambda row: Business_Description(row)) adict = {} for col in df.columns.tolist(): df[col] = df[col].map(lambda ele:pii(ele)) x = df[col].value_counts().idxmax() adict[col]= x df4["PII Type"] = df4["Column Name"].map(lambda ele :adict[ele]) df4["PII"] = df4["PII Type"].map(lambda ele:"No" if ele =="None" else "Yes" ) columns_order=['Column Number','Column Name','Data Type','Description',"PII","PII Type"] df5 = df4.reindex(columns=columns_order) value = session['file exists'] account = session['username'] return render_template("metadata.html", column_names=df5.columns.values, row_data=list(df5.values.tolist()), zip=zip, value=value,account = account) elif SourceType == 'AzureBlob': account_name = session['account_name'] account_key = session['account_key'] container_name = session['ContainerName'] blob_name = session['Blob Name'] url = f"https://{account_name}.blob.core.windows.net/{container_name}/{blob_name}" service = BaseBlobService(account_name=account_name, account_key=account_key) token = service.generate_blob_shared_access_signature(container_name, blob_name, permission=BlobPermissions.READ, expiry=datetime.utcnow() + timedelta(hours=1),) #print(url) session1 = requests.Session() response = session1.get(f"{url}?{token}", stream = True) a = session['azure file delimiter'] #variable with closing(response) as r: reader = csv.reader(codecs.iterdecode(r.iter_lines(), 'latin-1'), delimiter=a , quotechar='"',quoting=csv.QUOTE_MINIMAL ) #print(reader) lst = [] a=[] for row in islice(reader,0,5): #print(row) for cell in row: y=conv2(cell) a.append(y) lst.append(a) a=[] df = pd.DataFrame(lst[1:],columns=lst[0]) df2=pd.DataFrame(df.dtypes,index=None,columns='data_type'.split()) df3 = pd.DataFrame(df.columns,columns=['Column Name']) df4=df2.replace(['int64','float64','datetime64[ns]','object'],['int','float','datetime','string']) df4.index = df3.index df3['Data Type']=df4['data_type'] df3 = df3.assign(ColumnNumber=[i+1 for i in range(len(df3))])[['ColumnNumber'] + df3.columns.tolist()] df3['Column Number']= df3['ColumnNumber'] df4 = df3.drop(['ColumnNumber'], axis = 1) df4['Description'] = df4["Column Name"].map(lambda row: Business_Description(row)) adict = {} for col in df.columns.tolist(): df[col] = df[col].map(lambda ele:pii(ele)) x = df[col].value_counts().idxmax() adict[col]= x df4["PII Type"] = df4["Column Name"].map(lambda ele :adict[ele]) df4["PII"] = df4["PII Type"].map(lambda ele:"No" if ele =="None" else "Yes" ) columns_order=['Column Number','Column Name','Data Type','Description',"PII","PII Type"] df5 = df4.reindex(columns=columns_order) value = session['file exists'] account = session['username'] return render_template("metadata.html", column_names=df5.columns.values, row_data=list(df5.values.tolist()), zip=zip, value=value,account = account) elif SourceType == 'One Drive': #session['onedrive link'],session['Delimiter of onedrive'], session1 = requests.Session() dwn_url = session['onedrive link'] response = session1.get(dwn_url, stream = True) token = get_confirm_token(response) if token: params = { 'confirm' : token } response = session1.get(dwn_url, params = params, stream = True) a = session['Delimiter of onedrive'] with closing(response) as r: reader = csv.reader(codecs.iterdecode(r.iter_lines(), 'utf-8'), delimiter=a , quotechar='"',quoting=csv.QUOTE_MINIMAL ) lst = [] a=[] for row in islice(reader,0,10): for cell in row: y=conv2(cell) a.append(y) lst.append(a) a=[] df = pd.DataFrame(lst[1:],columns=lst[0]) df2=pd.DataFrame(df.dtypes,index=None,columns='data_type'.split()) df3 = pd.DataFrame(df.columns,columns=['Column Name']) df4=df2.replace(['int64','float64','datetime64[ns]','object'],['int','float','datetime','string']) df4.index = df3.index df3['Data Type']=df4['data_type'] df3 = df3.assign(ColumnNumber=[i+1 for i in range(len(df3))])[['ColumnNumber'] + df3.columns.tolist()] df3['Column Number']= df3['ColumnNumber'] df4 = df3.drop(['ColumnNumber'], axis = 1) df4['Description'] = df4["Column Name"].map(lambda row: Business_Description(row)) adict = {} for col in df.columns.tolist(): df[col] = df[col].map(lambda ele:pii(ele)) x = df[col].value_counts().idxmax() adict[col]= x df4["PII Type"] = df4["Column Name"].map(lambda ele :adict[ele]) df4["PII"] = df4["PII Type"].map(lambda ele:"No" if ele =="None" else "Yes" ) columns_order=['Column Number','Column Name','Data Type','Description',"PII","PII Type"] df5 = df4.reindex(columns=columns_order) value = session['file exists'] account = session['username'] return render_template("metadata.html", column_names=df5.columns.values, row_data=list(df5.values.tolist()), zip=zip, value=value,account = account)
def __init__(self, storage_config): self.service = BaseBlobService( account_name=storage_config['account_name'], account_key=storage_config['account_key'])
def list_blobs(storage_share, delta=1, prefix='', report_file='/tmp/filelist_report.txt', request='storagestats'): """Contact Azure endpoint using "list_blobs" method. Contacts an Azure blob and uses the "list_blobs" API to recursively obtain all the objects in a container and sum their size to obtain total space usage. Attributes: storage_share -- dynafed_storagestats StorageShare object. """ _total_bytes = 0 _total_files = 0 _base_blob_service = BaseBlobService( account_name=storage_share.uri['account'], account_key=storage_share.plugin_settings['azure.key'], # Set to true if using Azurite storage emulator for testing. is_emulated=False) _container_name = storage_share.uri['container'] _next_marker = None _timeout = int(storage_share.plugin_settings['conn_timeout']) _logger.debug( "[%s]Requesting storage stats with: URN: %s API Method: %s Account: %s Container: %s", storage_share.id, storage_share.uri['url'], storage_share.plugin_settings['storagestats.api'].lower(), storage_share.uri['account'], storage_share.uri['container']) while True: try: _blobs = _base_blob_service.list_blobs( _container_name, marker=_next_marker, timeout=_timeout, prefix=prefix, ) except azure.common.AzureMissingResourceHttpError as ERR: raise dynafed_storagestats.exceptions.ErrorAzureContainerNotFound( error='ContainerNotFound', status_code="404", debug=str(ERR), container=_container_name, ) except azure.common.AzureHttpError as ERR: raise dynafed_storagestats.exceptions.ConnectionErrorAzureAPI( error='ConnectionError', status_code="400", debug=str(ERR), api=storage_share.plugin_settings['storagestats.api'], ) except azure.common.AzureException as ERR: raise dynafed_storagestats.exceptions.ConnectionError( error='ConnectionError', status_code="400", debug=str(ERR), ) else: # Check what type of request is asked being used. if request == 'storagestats': try: # Make sure we got a list of objects. _blobs.items except AttributeError: storage_share.stats['bytesused'] = 0 break else: try: for _blob in _blobs: _total_bytes += int( _blob.properties.content_length) _total_files += 1 # Investigate except azure.common.AzureHttpError: pass elif request == 'filelist': try: # Make sure we got a list of objects. _blobs.items except AttributeError: break else: for _blob in _blobs: # Output files older than the specified delta. if dynafed_storagestats.time.mask_timestamp_by_delta( _blob.properties.last_modified, delta): report_file.write("%s\n" % _blob.name) _total_files += 1 # Exit if no "NextMarker" as list is now over. if _next_marker: _next_marker = _blobs.next_marker else: break # Save time when data was obtained. storage_share.stats['endtime'] = int(datetime.datetime.now().timestamp()) # Process the result for the storage stats. if request == 'storagestats': storage_share.stats['bytesused'] = int(_total_bytes) # Obtain or set default quota and calculate freespace. if storage_share.plugin_settings['storagestats.quota'] == 'api': storage_share.stats[ 'quota'] = dynafed_storagestats.helpers.convert_size_to_bytes( "1TB") storage_share.stats['filecount'] = _total_files storage_share.stats['bytesfree'] = storage_share.stats[ 'quota'] - storage_share.stats['bytesused'] raise dynafed_storagestats.exceptions.QuotaWarning( error="NoQuotaGiven", status_code="098", default_quota=storage_share.stats['quota'], ) else: storage_share.stats['quota'] = int( storage_share.plugin_settings['storagestats.quota']) storage_share.stats['filecount'] = _total_files storage_share.stats['bytesfree'] = storage_share.stats[ 'quota'] - storage_share.stats['bytesused']
cfg = docker.Config(in_config_dir) # loads application parameters - user defined parameters = cfg.get_parameters() account_key = parameters.get('account_key') account_name = parameters.get('account_name') data_container = parameters.get('data_container') config_container = parameters.get('config_container') date_col = parameters.get('date_col') # when date_col is not in params, set to default value if not date_col: date_col = date_col_default block_blob_service = BlockBlobService(account_name=account_name, account_key=account_key) base_blob_service = BaseBlobService(account_name=account_name, account_key=account_key) print( f'Docker cointainer will try to connect to {account_name} account of BlockBlobService...' ) def download_config(base_blob_service, config_container, table_name): config_name = table_name + config_suffix config_path = out_data_dir + config_name try: base_blob_service.get_blob_to_path(config_container, config_name, config_path) except: brand_new_config = {} brand_new_config['latest'] = '19700101' with open(config_path, 'w') as outfile:
def create_instance( self, cloud_service_name, disk_name, system_config, network_config=None, label=None, group='production', machine_size='Small', reserved_ip_name=None ): """ create a virtual disk image instance """ if not self.__storage_reachable_by_cloud_service(cloud_service_name): message = [ 'The cloud service "%s" and the storage account "%s"', 'are not in the same region, cannot launch an instance.' ] raise AzureStorageNotReachableByCloudServiceError( ' '.join(message) % ( cloud_service_name, self.account.storage_name() ) ) if not self.__image_reachable_by_cloud_service( cloud_service_name, disk_name ): message = [ 'The selected image "%s" is not available', 'in the region of the selected cloud service "%s",', 'cannot launch instance' ] raise AzureImageNotReachableByCloudServiceError( ' '.join(message) % ( disk_name, cloud_service_name ) ) deployment_exists = self.__get_deployment( cloud_service_name ) if label and deployment_exists: message = [ 'A deployment of the name: %s already exists.', 'Assignment of a label can only happen for the', 'initial deployment.' ] raise AzureVmCreateError( ' '.join(message) % cloud_service_name ) if reserved_ip_name and deployment_exists: message = [ 'A deployment of the name: %s already exists.', 'Assignment of a reserved IP name can only happen for the', 'initial deployment.' ] raise AzureVmCreateError( ' '.join(message) % cloud_service_name ) storage = BaseBlobService( self.account.storage_name(), self.account.storage_key(), endpoint_suffix=self.account.get_blob_service_host_base() ) media_link = storage.make_blob_url( self.account.storage_container(), ''.join( [ cloud_service_name, '_instance_', system_config.host_name, '_image_', disk_name ] ) ) instance_disk = OSVirtualHardDisk(disk_name, media_link) instance_record = { 'deployment_name': cloud_service_name, 'network_config': network_config, 'role_name': system_config.host_name, 'role_size': machine_size, 'service_name': cloud_service_name, 'system_config': system_config, 'os_virtual_hard_disk': instance_disk, 'provision_guest_agent': True } if network_config: instance_record['network_config'] = network_config try: if deployment_exists: result = self.service.add_role( **instance_record ) else: instance_record['deployment_slot'] = group if reserved_ip_name: instance_record['reserved_ip_name'] = reserved_ip_name if label: instance_record['label'] = label else: instance_record['label'] = cloud_service_name result = self.service.create_virtual_machine_deployment( **instance_record ) return { 'request_id': format(result.request_id), 'cloud_service_name': cloud_service_name, 'instance_name': system_config.host_name } except Exception as e: raise AzureVmCreateError( '%s: %s' % (type(e).__name__, format(e)) )