def write_rates_df_to_table(table_name: str, table: pd.DataFrame, account_name: str = None, account_key: str = None, connection_string: str = None): # Set up connection to table service table_service = __connect(table_name, account_name, account_key, connection_string) # Specify PartitonKey and RowKey table["PartitionKey"] = table["Date"] table["Date"] = pd.to_datetime(table["Date"]) table["RowKey"] = table["country_code"] + "_" + table["Maturity"].astype( str) # Iterate through each PartitionKey and insert the rows into batch and submit to table service for _, partition_df in table.groupby( ["PartitionKey", np.arange(len(table)) // 100]): batch = TableBatch() rates_list = json.loads( partition_df.to_json(date_format="iso", orient="records")) for rate in rates_list: batch.insert_or_replace_entity(rate) table_service.commit_batch(table_name, batch)
def remove_entry_for_resource(subscription_id, row_keys, table_service): batch = TableBatch() for row_key in row_keys: batch.delete_entity(subscription_id, row_key) table_service.commit_batch(REQUIRED_TAGS_TABLE_NAME, batch)
def uploadToTable(): connection_string, container_name = get_credentials() blob = BlobClient.from_connection_string(conn_str=connection_string, container_name=container_name, blob_name=filename) blob_data = blob.download_blob().readall() json_file_content = blob_data.decode('utf8') jlist = json.loads(json_file_content) table_service = TableService(connection_string=connection_string) table_service.create_table(table_name=fixed_tablename) partition_key = 'Fishlanding' row = 1 batch_list = [] for jdict in jlist: task = {'PartitionKey': partition_key, 'RowKey': str(row)} for key in jdict: keyVal = key.replace(' ', '') if keyVal == '': pass task[keyVal] = jdict[key] batch_list.append(task) row += 1 seperated_batch_list = list_of_groups(init_list=batch_list, childern_list_len=50) for children_list in seperated_batch_list: batch = TableBatch() for task in children_list: batch.insert_or_replace_entity(task) table_service.commit_batch(table_name=tablename, batch=batch) return batch_list
def upload_data_to_azure(table_service): global data # Create table if it doesn't exist table_service.create_table(dataset_table_name, fail_on_exist=False) batch = TableBatch() for d in data: batch.insert_entity(d) table_service.commit_batch(dataset_table_name, batch) data = []
def main(event: func.EventGridEvent): event_type = event.event_type event_subject = event.subject containername = event_subject.split("/")[-3] # credentials needed connection_string = os.getenv('AZURE_CONNECTION_STRING') container_name = os.getenv('ContainerName') # Only blob creations at container would trigger this function if event_type == "Microsoft.Storage.BlobCreated" and containername == container_name: filename = event_subject.split("/")[-1] tablename = gettablename( filename=filename) + datetime.now().strftime("%H%M%S") table_service = TableService(connection_string=connection_string) table_service.create_table(table_name=tablename) # Download the blob data blob = BlobClient.from_connection_string(conn_str=connection_string, container_name=container_name, blob_name=filename) blob_data = blob.download_blob().readall() json_file_content = blob_data.decode('utf8').replace("'", '"') jlist = json.loads(json_file_content) # The partition key might be changed later. This is only for DEVELOPMENT purpose partition_key = tablename row = 1 batch_list = [] for jdict in jlist: if 'Cruise_ID' in jdict: partition_key = jdict['Cruise_ID'] task = {'PartitionKey': partition_key, 'RowKey': str(row)} for key in jdict: keyVal = key.replace(' ', '') if keyVal == '': continue task[keyVal] = jdict[key] batch_list.append(task) row += 1 seperated_batch_list = list_of_groups(init_list=batch_list, childern_list_len=50) for children_list in seperated_batch_list: batch = TableBatch() for task in children_list: batch.insert_or_replace_entity(task) table_service.commit_batch(table_name=tablename, batch=batch)
def batch_entry(): batch = TableBatch() order004 = { 'PartitionKey': 'ordersSeattle', 'RowKey': '004', 'description': 'Go grocery shopping', 'priority': 400 } order005 = { 'PartitionKey': 'ordersSeattle', 'RowKey': '005', 'description': 'Clean the bathroom', 'priority': 100 } batch.insert_entity(order004) batch.insert_entity(order005) table_service.commit_batch('ordertable', batch) order006 = { 'PartitionKey': 'ordersSeattle', 'RowKey': '006', 'description': 'Go grocery shopping', 'priority': 400 } order010 = { 'PartitionKey': 'ordersSeattle', 'RowKey': '007', 'description': 'Clean the bathroom', 'priority': 100 } with table_service.batch('ordertable') as batch: batch.insert_entity(order006) batch.insert_entity(order007)
def migrate_task_os(table_service: TableService) -> None: table_name = "Task" tasks = table_service.query_entities( table_name, select="PartitionKey,RowKey,os,config") partitionKey = None count = 0 batch = TableBatch() for task in tasks: if partitionKey != task.PartitionKey: table_service.commit_batch(table_name, batch) batch = TableBatch() partitionKey = task.PartitionKey if "os" not in task or (not task.os): config = json.loads(task.config) print(config) if "windows".lower() in config["vm"]["image"].lower(): task["os"] = "windows" else: task["os"] = "linux" count = count + 1 batch.merge_entity(task) table_service.commit_batch(table_name, batch) print("migrated %s rows" % count)
def main(req: func.HttpRequest) -> func.HttpResponse: logging.info('Starting bulk insert.') ret = dict() table_name = req.headers.get('name') values = req.get_json() if table_name: retrieved_secret = getConnectionString() table_service = TableService(connection_string=retrieved_secret.value) batch = TableBatch() for i in range(0, len(values['rows'])): batch.insert_entity(values['rows'][i]) table_service.commit_batch(table_name, batch) ret['result'] = "Success" return func.HttpResponse(json.dumps(ret), status_code=200) else: ret['result'] = 'Error' return func.HttpResponse(json.dumps(ret), status_code=400)
def sendEntities(): table_service = TableService( connection_string='') batch = TableBatch() inBatch = 0 for a in bitcoins: batch.insert_or_replace_entity(a) inBatch += 1 if inBatch > 99: table_service.commit_batch('CryptoCompareAPIData', batch) batch = TableBatch() inBatch = 0 table_service.commit_batch('CryptoCompareAPIData', batch) batch = TableBatch() inBatch = 0
def sendEntities(): table_service = TableService( connection_string='DefaultEndpointsProtocol=https;AccountName=sauokgp;AccountKey=113mdwUqIiqt4K2HonK80HakIOplxYZINmQME5KB1IZfP+v3JHZK64wpoTP5NBFaG0MaO/TVqA0nW4KuCINTow==;EndpointSuffix=core.windows.net') batch = TableBatch() inBatch = 0 for a in bitcoins: batch.insert_or_replace_entity(a) inBatch += 1 if inBatch > 99: table_service.commit_batch('HelpPage', batch) batch = TableBatch() inBatch = 0 table_service.commit_batch('HelpPage', batch) batch = TableBatch() inBatch = 0
def uploadentities(): table_service = TableService( connection_string=tablestorageconnectionstring) batch = TableBatch() inBatch = 0 for e in entities: batch.insert_or_replace_entity(e) inBatch += 1 if inBatch > 99: print("Sending batch...") table_service.commit_batch('PredictionData', batch) print("Batch sent!") batch = TableBatch() inBatch = 0 print("Sending batch...") table_service.commit_batch('PredictionData', batch) print("Batch sent!") batch = TableBatch() inBatch = 0
def save(self, storage_account: str, key: str) -> None: from azure.cosmosdb.table.tableservice import TableService from azure.cosmosdb.table.models import Entity from azure.cosmosdb.table.tablebatch import TableBatch # Create the table storage object ts = TableService(account_name=storage_account, account_key=key) # Create the records from the data records = (self.data.assign( date_retrieved=lambda df: df['date_retrieved'].dt.date).rename( columns={ 'date_retrieved': 'PartitionKey', 'date_forecast': 'RowKey' }).astype(str).to_dict(orient='records')) # Add the data to the batch and save it batch = TableBatch() list(map(batch.insert_entity, records)) ts.commit_batch('WeatherNetworkForecasts', batch)
def crawl_and_store(table_service, symbol): batch = TableBatch() batch_size = 0 for each_trend in fetch(symbol): batch.insert_or_replace_entity(each_trend) batch_size = batch_size + 1 if batch_size >= 75: table_service.commit_batch('stocktrend', batch) batch = TableBatch() print("stored a batch, size:", batch_size) batch_size = 0 if batch_size > 0: table_service.commit_batch('stocktrend', batch) print("stored a batch, size:", batch_size)
def update_table_storage_based_on_new_data(subscription_id, non_compliant_resources, non_compliant_ids_to_add, non_compliant_ids_to_delete, table_service): """ Makes a batch update by adding / deleting rows from the table storage """ batch = TableBatch() for ncr in non_compliant_resources: if ncr['RowKey'] in non_compliant_ids_to_add: batch.insert_entity(ncr) for row_key in non_compliant_ids_to_delete: batch.delete_entity(subscription_id, row_key) table_service.commit_batch(RECOMMENDATIONS_TABLE_NAME, batch)
table_name = "coursecatalog" connect_str = "MAGIC" from azure.cosmosdb.table.tableservice import TableService from azure.cosmosdb.table.models import Entity # Let's import the batch library from azure.cosmosdb.table.tablebatch import TableBatch # Connect to the Table service table_service = TableService(connection_string=connect_str) print("\nConnected to Table service") # Let's perform a batch transaction, and add two course expiry dates batch = TableBatch() task001 = { 'PartitionKey': 'Azure', 'RowKey': '280', 'expiryDate': '1 July 2020' } task002 = { 'PartitionKey': 'Azure', 'RowKey': '381', 'expiryDate': '1 July 2020' } # Perform batch transaction using MERGE (could be update, insert, etc) batch.merge_entity(task001) batch.merge_entity(task002) table_service.commit_batch(table_name, batch)
def insert_batch(self, items: list): batch = TableBatch() for item in items: batch.insert_entity(self.get_payload(item)) return self.table.commit_batch(self.table_name, batch)
from azure.cosmosdb.table.tableservice import TableService from azure.cosmosdb.table.models import Entity from azure.cosmosdb.table.tablebatch import TableBatch table_service = TableService( account_name='az532rg1diag572', account_key= 'coixsNqJU6lbm2w9yCWh+fODv+NqAFjgV+jHRy4zkZX8ywkrJ+nfawSPNCF0tgzOh8FstVAG4tUu/pOeDwfLEQ==' ) batch = TableBatch() task008 = { 'PartitionKey': 'tasksTigard', 'RowKey': '008', 'description': 'Go grocery shopping', 'priority': 400 } task009 = { 'PartitionKey': 'tasksTigard', 'RowKey': '009', 'description': 'Clean the bathroom', 'priority': 100 } batch.insert_entity(task008) batch.insert_entity(task009) table_service.commit_batch('tasktable', batch) task010 = { 'PartitionKey': 'tasksTigard', 'RowKey': '010', 'description': 'Go grocery shopping',
# Replace the entity created earlier print("Replace task using insert_or_replace... - Take out the garbage again") task = {'PartitionKey': 'tasksSeattle', 'RowKey': '001', 'description': 'Take out the garbage again', 'priority': 250} table_service.insert_or_replace_entity('tasktable', task) # Insert a new entity print("insert or replay rowkey 003 - buy detergent") task = {'PartitionKey': 'tasksSeattle', 'RowKey': '003', 'description': 'Buy detergent', 'priority': 300} table_service.insert_or_replace_entity('tasktable', task) # batch processing - Add multiple entries print("batch processing task 004/005") batch = TableBatch() task004 = {'PartitionKey': 'tasksSeattle', 'RowKey': '004', 'description': 'Go grocery shopping', 'priority': 400} task005 = {'PartitionKey': 'tasksSeattle', 'RowKey': '005', 'description': 'Clean the bathroom', 'priority': 100} batch.insert_entity(task004) batch.insert_entity(task005) table_service.commit_batch('tasktable', batch) # alternative way to use batch, using context print("batch insert using context...") task006 = {'PartitionKey': 'tasksSeattle', 'RowKey': '006', 'description': 'Go grocery shopping', 'priority': 400} task007 = {'PartitionKey': 'tasksSeattle', 'RowKey': '007', 'newCol': 'newColVal1', 'description': 'Clean the bathroom', 'priority': 100}
def main(myblob: func.InputStream): logging.info( f"Python blob trigger function processed blob \n" f"Name: {myblob.name}\n" f"Blob Size: {myblob.length} bytes" ) postcode_data = pandas.read_csv( "https://raw.githubusercontent.com/Elkfox/Australian-Postcode-Data/master/au_postcodes.csv", dtype={"postcode": str}, ) df = pandas.read_excel( io.BytesIO(myblob.read()), engine="openpyxl", true_values=("Y",), false_values=("n"), ) df.dropna(subset=["ABN"], inplace=True) # Remove entries with no ABN df.drop_duplicates( subset=["ABN"], keep="last", inplace=True ) # Remove duplicate entries and keep the last df["Town_City"] = df["Town_City"].apply(lambda x: str(x).title()) df["Reporting_hours___Paid"].fillna(0, inplace=True) df["Reporting_hours___Unpaid"].fillna(0, inplace=True) df["Reporting_hours___Total"].fillna(0, inplace=True) check_columns = ( "Operates_in_ACT", "Operates_in_NSW", "Operates_in_NT", "Operates_in_QLD", "Operates_in_SA", "Operates_in_TAS", "Operates_in_VIC", "Operates_in_WA", "Relief_of_poverty_sickness_or_the_needs_of_the_aged", "The_advancement_of_education", "The_advancement_of_religion", "The_provision_of_child_care_services", "Other_purposes_beneficial_to_the_community", "BASIC_RELIGIOUS", "Conducted_Activities", "Animal_Protection", "Aged_Care_Activities", "Civic_and_advocacy_activities", "Culture_and_arts", "Economic_social_and_community_development", "Emergency_Relief", "Employment_and_training", "Environmental_activities", "Grant_making_activities", "Higher_education", "Hospital_services_and_rehabilitation_activities", "Housing_activities", "Income_support_and_maintenance", "International_activities", "Law_and_legal_services", "Mental_health_and_crisis_intervention", "Political_activities", "Primary_and_secondary_education", "Religious_activities", "Research", "Social_services", "Sports", "Other_Educations", "other_health_service_delivery", "Other_recreation_and_social_club_activity", "Other", "Aboriginal_or_TSI", "Aged_Persons", "Children", "Communities_Overseas", "Ethnic_Groups", "Gay_Lesbian_Bisexual", "General_Community_in_Australia", "men", "Migrants_Refugees_or_Asylum_Seekers", "Pre_Post_Release_Offenders", "People_with_Chronic_Illness", "People_with_Disabilities", "People_at_risk_of_homelessness", "Unemployed_Persons", "Veterans_or_their_families", "Victims_of_crime", "Victims_of_Disasters", "Women", "Youth", "Other_charities", "Other_beneficiaries_not_listed", "Reporting_Obligations___ACT", "Reporting_Obligations___NSW", "Reporting_Obligations___NT", "Reporting_Obligations___QLD", "Reporting_Obligations___SA", "Reporting_Obligations___TAS", "Reporting_Obligations___VIC", "Reporting_Obligations___WA", ) col_names = list(df.columns) for col in check_columns: if col in col_names: df[col].fillna(False, inplace=True) df["State"] = df["Postcode"].apply(map_postcode) df = pandas.merge( df, postcode_data, left_on=["Postcode", "Town_City"], right_on=["postcode", "place_name"], how="left", ).drop(columns=["postcode", "place_name", "state_name", "state_code", "accuracy"]) # Connect to Azure Table Store table_service = TableService( account_name=os.getenv("TABLE_STORAGE_ACCOUNT_NAME"), account_key=os.getenv("TABLE_STORAGE_KEY"), ) # Convert dataframe to list of dictionary records = df.to_dict(orient="records") # Batch in 100 rows for record_batch in ( records[pos : pos + 100] for pos in range(0, len(records), 100) ): batch = TableBatch() for record in record_batch: entry = { "PartitionKey": f"ais", "RowKey": str(record["ABN"]), } for k, v in record.items(): cleanCamelKey = camelKey(k) # Check that removing those characters didn't create a duplicate key if cleanCamelKey in entry: raise ValueError(f"Key collision on {cleanCamelKey}") entry[cleanCamelKey] = v batch.insert_or_replace_entity(entry) table_service.commit_batch("ACN", batch)