def sync_excel_blobs_and_az_tables(): table_service = TableService(account_name=Config.TABLE_ACCOUNT_NAME, account_key=Config.TABLE_KEY) if not table_service.exists(Config.PRODUCT_TABLE_NAME): create_table(table_service, Config.PRODUCT_TABLE_NAME) # TODO: Consider insert_or_merge_entity() here instead https://docs.microsoft.com/en-us/python/api/azure-cosmosdb-table/azure.cosmosdb.table.tableservice.tableservice?view=azure-python delete_all_entries_in_table(table_service, Config.PRODUCT_TABLE_NAME) batch = TableBatch() table_data = get_metadata_blob_data() products_data = get_product_blobs_data() for p in table_data: try: p["cumulative"] = str(products_data[p["id"]]["cumulative"]) except KeyError: p["cumulative"] = None batch.insert_entity(p) try: table_service.commit_batch(table_name=Config.PRODUCT_TABLE_NAME, batch=batch) print( f"Uploaded products data to '{Config.PRODUCT_TABLE_NAME}' table in '{Config.TABLE_ACCOUNT_NAME}' storage account" ) except HeaderParsingError as e: print(e)
def migrate_task_os(table_service: TableService) -> None: table_name = "Task" tasks = table_service.query_entities( table_name, select="PartitionKey,RowKey,os,config") partitionKey = None count = 0 batch = TableBatch() for task in tasks: if partitionKey != task.PartitionKey: table_service.commit_batch(table_name, batch) batch = TableBatch() partitionKey = task.PartitionKey if "os" not in task or (not task.os): config = json.loads(task.config) print(config) if "windows".lower() in config["vm"]["image"].lower(): task["os"] = "windows" else: task["os"] = "linux" count = count + 1 batch.merge_entity(task) table_service.commit_batch(table_name, batch) print("migrated %s rows" % count)
def uploadToTable(): connection_string, container_name = get_credentials() blob = BlobClient.from_connection_string(conn_str=connection_string, container_name=container_name, blob_name=filename) blob_data = blob.download_blob().readall() json_file_content = blob_data.decode('utf8') jlist = json.loads(json_file_content) table_service = TableService(connection_string=connection_string) table_service.create_table(table_name=fixed_tablename) partition_key = 'Fishlanding' row = 1 batch_list = [] for jdict in jlist: task = {'PartitionKey': partition_key, 'RowKey': str(row)} for key in jdict: keyVal = key.replace(' ', '') if keyVal == '': pass task[keyVal] = jdict[key] batch_list.append(task) row += 1 seperated_batch_list = list_of_groups(init_list=batch_list, childern_list_len=50) for children_list in seperated_batch_list: batch = TableBatch() for task in children_list: batch.insert_or_replace_entity(task) table_service.commit_batch(table_name=tablename, batch=batch) return batch_list
def main(event: func.EventGridEvent): event_type = event.event_type event_subject = event.subject containername = event_subject.split("/")[-3] # credentials needed connection_string = os.getenv('AZURE_CONNECTION_STRING') container_name = os.getenv('ContainerName') # Only blob creations at container would trigger this function if event_type == "Microsoft.Storage.BlobCreated" and containername == container_name: filename = event_subject.split("/")[-1] tablename = gettablename( filename=filename) + datetime.now().strftime("%H%M%S") table_service = TableService(connection_string=connection_string) table_service.create_table(table_name=tablename) # Download the blob data blob = BlobClient.from_connection_string(conn_str=connection_string, container_name=container_name, blob_name=filename) blob_data = blob.download_blob().readall() json_file_content = blob_data.decode('utf8').replace("'", '"') jlist = json.loads(json_file_content) # The partition key might be changed later. This is only for DEVELOPMENT purpose partition_key = tablename row = 1 batch_list = [] for jdict in jlist: if 'Cruise_ID' in jdict: partition_key = jdict['Cruise_ID'] task = {'PartitionKey': partition_key, 'RowKey': str(row)} for key in jdict: keyVal = key.replace(' ', '') if keyVal == '': continue task[keyVal] = jdict[key] batch_list.append(task) row += 1 seperated_batch_list = list_of_groups(init_list=batch_list, childern_list_len=50) for children_list in seperated_batch_list: batch = TableBatch() for task in children_list: batch.insert_or_replace_entity(task) table_service.commit_batch(table_name=tablename, batch=batch)
def sendEntities(): table_service = TableService( connection_string='DefaultEndpointsProtocol=https;AccountName=sauokgp;AccountKey=113mdwUqIiqt4K2HonK80HakIOplxYZINmQME5KB1IZfP+v3JHZK64wpoTP5NBFaG0MaO/TVqA0nW4KuCINTow==;EndpointSuffix=core.windows.net') batch = TableBatch() inBatch = 0 for a in bitcoins: batch.insert_or_replace_entity(a) inBatch += 1 if inBatch > 99: table_service.commit_batch('HelpPage', batch) batch = TableBatch() inBatch = 0 table_service.commit_batch('HelpPage', batch) batch = TableBatch() inBatch = 0
def sendEntities(): table_service = TableService( connection_string='') batch = TableBatch() inBatch = 0 for a in bitcoins: batch.insert_or_replace_entity(a) inBatch += 1 if inBatch > 99: table_service.commit_batch('CryptoCompareAPIData', batch) batch = TableBatch() inBatch = 0 table_service.commit_batch('CryptoCompareAPIData', batch) batch = TableBatch() inBatch = 0
def save(self, storage_account: str, key: str) -> None: from azure.cosmosdb.table.tableservice import TableService from azure.cosmosdb.table.models import Entity from azure.cosmosdb.table.tablebatch import TableBatch # Create the table storage object ts = TableService(account_name=storage_account, account_key=key) # Create the records from the data records = (self.data.assign( date_retrieved=lambda df: df['date_retrieved'].dt.date).rename( columns={ 'date_retrieved': 'PartitionKey', 'date_forecast': 'RowKey' }).astype(str).to_dict(orient='records')) # Add the data to the batch and save it batch = TableBatch() list(map(batch.insert_entity, records)) ts.commit_batch('WeatherNetworkForecasts', batch)
def uploadentities(): table_service = TableService( connection_string=tablestorageconnectionstring) batch = TableBatch() inBatch = 0 for e in entities: batch.insert_or_replace_entity(e) inBatch += 1 if inBatch > 99: print("Sending batch...") table_service.commit_batch('PredictionData', batch) print("Batch sent!") batch = TableBatch() inBatch = 0 print("Sending batch...") table_service.commit_batch('PredictionData', batch) print("Batch sent!") batch = TableBatch() inBatch = 0
def main(mytimer: func.TimerRequest) -> None: logging.info('Running server cleanup...') table_name = 'servers' table_service = TableService( connection_string=os.environ['AzureWebJobsStorage']) batch = TableBatch() servers = table_service.query_entities(table_name) for server in servers: last_heartbeat = server.lastHeartbeatTime current_time = datetime.now(timezone.utc) seconds_since_last_heartbeat = current_time - last_heartbeat if seconds_since_last_heartbeat.total_seconds() >= int( os.environ['HeartbeatTimeoutSeconds']): batch.delete_entity(server.PartitionKey, server.RowKey) table_service.commit_batch(table_name, batch) logging.info('Server clean complete!')
def main(req: func.HttpRequest) -> func.HttpResponse: logging.info('Starting bulk insert.') ret = dict() table_name = req.headers.get('name') values = req.get_json() if table_name: retrieved_secret = getConnectionString() table_service = TableService(connection_string=retrieved_secret.value) batch = TableBatch() for i in range(0, len(values['rows'])): batch.insert_entity(values['rows'][i]) table_service.commit_batch(table_name, batch) ret['result'] = "Success" return func.HttpResponse(json.dumps(ret), status_code=200) else: ret['result'] = 'Error' return func.HttpResponse(json.dumps(ret), status_code=400)
class AzureTable(object): def __init__(self, account_name: str, account_key: str, table_name: str, partition_key_field: str, clustering_key_field: str): self.table = TableService(account_name=account_name, account_key=account_key) self.table_name = self.table_name self.partition_key_field = partition_key_field self.clustering_key_field = clustering_key_field @property def partition_key_name(self) -> str: return 'PartitionKey' @property def clustering_key_name(self) -> str: return 'RowKey' def get_payload(self, payload: dict): item = deepcopy(payload) partition_key = payload.get(self.partition_key_field) clustering_key = payload.get(self.clustering_key_field) if partition_key is None: raise PartitionKeyNotFoundError( 'payload={} does not have a partition key') if clustering_key is None: raise ClusteringKeyNotFoundError( 'payload={} does not have a clustering key') item.update({ self.partition_key_name: partition_key, self.clustering_key_name: clustering_key }) return item def create(self): return self.table.create_table(self.table_name) def insert(self, item: dict): return self.table.insert_entity(self.table_name, self.get_payload(item)) def update(self, item: dict): pass def upsert(self, item: dict): pass def delete(self, key: str): pass def read(self, key: str): pass def insert_batch(self, items: list): batch = TableBatch() for item in items: batch.insert_entity(self.get_payload(item)) return self.table.commit_batch(self.table_name, batch) def get(self, partition_key: str, clustering_key: str): return self.table.get_entity(self.table_name, partition_key, clustering_key) def get_by_partition(self, partition_key: str) -> list: return self.table.query_entities(self.table_name, filter="{} eq '{}'".format( self.partition_key_name, partition_key))
batch = TableBatch() task008 = { 'PartitionKey': 'tasksTigard', 'RowKey': '008', 'description': 'Go grocery shopping', 'priority': 400 } task009 = { 'PartitionKey': 'tasksTigard', 'RowKey': '009', 'description': 'Clean the bathroom', 'priority': 100 } batch.insert_entity(task008) batch.insert_entity(task009) table_service.commit_batch('tasktable', batch) task010 = { 'PartitionKey': 'tasksTigard', 'RowKey': '010', 'description': 'Go grocery shopping', 'priority': 400 } task011 = { 'PartitionKey': 'tasksTigard', 'RowKey': '011', 'description': 'Clean the bathroom', 'priority': 100 } with table_service.batch('tasktable') as batch:
def main(myblob: func.InputStream): logging.info( f"Python blob trigger function processed blob \n" f"Name: {myblob.name}\n" f"Blob Size: {myblob.length} bytes" ) postcode_data = pandas.read_csv( "https://raw.githubusercontent.com/Elkfox/Australian-Postcode-Data/master/au_postcodes.csv", dtype={"postcode": str}, ) df = pandas.read_excel( io.BytesIO(myblob.read()), engine="openpyxl", true_values=("Y",), false_values=("n"), ) df.dropna(subset=["ABN"], inplace=True) # Remove entries with no ABN df.drop_duplicates( subset=["ABN"], keep="last", inplace=True ) # Remove duplicate entries and keep the last df["Town_City"] = df["Town_City"].apply(lambda x: str(x).title()) df["Reporting_hours___Paid"].fillna(0, inplace=True) df["Reporting_hours___Unpaid"].fillna(0, inplace=True) df["Reporting_hours___Total"].fillna(0, inplace=True) check_columns = ( "Operates_in_ACT", "Operates_in_NSW", "Operates_in_NT", "Operates_in_QLD", "Operates_in_SA", "Operates_in_TAS", "Operates_in_VIC", "Operates_in_WA", "Relief_of_poverty_sickness_or_the_needs_of_the_aged", "The_advancement_of_education", "The_advancement_of_religion", "The_provision_of_child_care_services", "Other_purposes_beneficial_to_the_community", "BASIC_RELIGIOUS", "Conducted_Activities", "Animal_Protection", "Aged_Care_Activities", "Civic_and_advocacy_activities", "Culture_and_arts", "Economic_social_and_community_development", "Emergency_Relief", "Employment_and_training", "Environmental_activities", "Grant_making_activities", "Higher_education", "Hospital_services_and_rehabilitation_activities", "Housing_activities", "Income_support_and_maintenance", "International_activities", "Law_and_legal_services", "Mental_health_and_crisis_intervention", "Political_activities", "Primary_and_secondary_education", "Religious_activities", "Research", "Social_services", "Sports", "Other_Educations", "other_health_service_delivery", "Other_recreation_and_social_club_activity", "Other", "Aboriginal_or_TSI", "Aged_Persons", "Children", "Communities_Overseas", "Ethnic_Groups", "Gay_Lesbian_Bisexual", "General_Community_in_Australia", "men", "Migrants_Refugees_or_Asylum_Seekers", "Pre_Post_Release_Offenders", "People_with_Chronic_Illness", "People_with_Disabilities", "People_at_risk_of_homelessness", "Unemployed_Persons", "Veterans_or_their_families", "Victims_of_crime", "Victims_of_Disasters", "Women", "Youth", "Other_charities", "Other_beneficiaries_not_listed", "Reporting_Obligations___ACT", "Reporting_Obligations___NSW", "Reporting_Obligations___NT", "Reporting_Obligations___QLD", "Reporting_Obligations___SA", "Reporting_Obligations___TAS", "Reporting_Obligations___VIC", "Reporting_Obligations___WA", ) col_names = list(df.columns) for col in check_columns: if col in col_names: df[col].fillna(False, inplace=True) df["State"] = df["Postcode"].apply(map_postcode) df = pandas.merge( df, postcode_data, left_on=["Postcode", "Town_City"], right_on=["postcode", "place_name"], how="left", ).drop(columns=["postcode", "place_name", "state_name", "state_code", "accuracy"]) # Connect to Azure Table Store table_service = TableService( account_name=os.getenv("TABLE_STORAGE_ACCOUNT_NAME"), account_key=os.getenv("TABLE_STORAGE_KEY"), ) # Convert dataframe to list of dictionary records = df.to_dict(orient="records") # Batch in 100 rows for record_batch in ( records[pos : pos + 100] for pos in range(0, len(records), 100) ): batch = TableBatch() for record in record_batch: entry = { "PartitionKey": f"ais", "RowKey": str(record["ABN"]), } for k, v in record.items(): cleanCamelKey = camelKey(k) # Check that removing those characters didn't create a duplicate key if cleanCamelKey in entry: raise ValueError(f"Key collision on {cleanCamelKey}") entry[cleanCamelKey] = v batch.insert_or_replace_entity(entry) table_service.commit_batch("ACN", batch)
from azure.cosmosdb.table.tableservice import TableService from azure.cosmosdb.table.models import Entity # Let's import the batch library from azure.cosmosdb.table.tablebatch import TableBatch # Connect to the Table service table_service = TableService(connection_string=connect_str) print("\nConnected to Table service") # Let's perform a batch transaction, and add two course expiry dates batch = TableBatch() task001 = { 'PartitionKey': 'Azure', 'RowKey': '280', 'expiryDate': '1 July 2020' } task002 = { 'PartitionKey': 'Azure', 'RowKey': '381', 'expiryDate': '1 July 2020' } # Perform batch transaction using MERGE (could be update, insert, etc) batch.merge_entity(task001) batch.merge_entity(task002) table_service.commit_batch(table_name, batch) print("\nBatch transaction complete") print("=======================")