def write_to_db(binary_size_data, args): # connect to database cluster = "https://ingest-onnxruntimedashboarddb.southcentralus.kusto.windows.net" kcsb = KustoConnectionStringBuilder.with_az_cli_authentication(cluster) # The authentication method will be taken from the chosen KustoConnectionStringBuilder. client = QueuedIngestClient(kcsb) fields = ["build_time", "build_id", "build_project", "commit_id", "os", "arch", "build_config", "size", "Branch"] now_str = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") branch_name = os.environ.get("BUILD_SOURCEBRANCHNAME", "main") rows = [] for row in binary_size_data: rows.append( [ now_str, args.build_id, args.build_project, args.commit_hash, row["os"], row["arch"], row["build_config"], row["size"], branch_name.lower(), ] ) ingestion_props = IngestionProperties( database="powerbi", table="binary_size", data_format=DataFormat.CSV, report_level=ReportLevel.FailuresAndSuccesses, ) df = pandas.DataFrame(data=rows, columns=fields) client.ingest_from_dataframe(df, ingestion_properties=ingestion_props)
def write_to_db(coverage_data, args): # connect to database cluster = "https://ingest-onnxruntimedashboarddb.southcentralus.kusto.windows.net" kcsb = KustoConnectionStringBuilder.with_az_cli_authentication(cluster) # The authentication method will be taken from the chosen KustoConnectionStringBuilder. client = QueuedIngestClient(kcsb) fields = [ "UploadTime", "CommitId", "Coverage", "LinesCovered", "TotalLines", "OS", "Arch", "BuildConfig", "ReportURL", "Branch" ] now_str = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") rows = [[ now_str, args.commit_hash, coverage_data['coverage'], coverage_data['lines_covered'], coverage_data['lines_valid'], args.os.lower(), args.arch.lower(), args.build_config.lower(), args.report_url.lower(), args.branch.lower() ]] ingestion_props = IngestionProperties( database="powerbi", table="test_coverage", data_format=DataFormat.CSV, report_level=ReportLevel.FailuresAndSuccesses) df = pandas.DataFrame(data=rows, columns=fields) client.ingest_from_dataframe(df, ingestion_properties=ingestion_props)
def test_simple_ingest_from_dataframe(self, mock_pid, mock_time, mock_uuid, mock_put_message_in_queue, mock_upload_blob_from_stream): responses.add_callback( responses.POST, "https://ingest-somecluster.kusto.windows.net/v1/rest/mgmt", callback=request_callback, content_type="application/json" ) ingest_client = QueuedIngestClient("https://ingest-somecluster.kusto.windows.net") ingestion_properties = IngestionProperties(database="database", table="table", data_format=DataFormat.CSV) from pandas import DataFrame fields = ["id", "name", "value"] rows = [[1, "abc", 15.3], [2, "cde", 99.9]] df = DataFrame(data=rows, columns=fields) ingest_client.ingest_from_dataframe(df, ingestion_properties=ingestion_properties) # mock_put_message_in_queue assert mock_put_message_in_queue.call_count == 1 put_message_in_queue_mock_kwargs = mock_put_message_in_queue.call_args_list[0][1] queued_message_json = json.loads(put_message_in_queue_mock_kwargs["content"]) expected_url = "https://storageaccount.blob.core.windows.net/tempstorage/database__table__1111-111111-111111-1111__df_{0}_100_1111-111111-111111-1111.csv.gz?".format( id(df) ) # mock_upload_blob_from_stream # not checking the query string because it can change order, just checking it's there assert queued_message_json["BlobPath"].startswith(expected_url) is True assert len(queued_message_json["BlobPath"]) > len(expected_url) assert queued_message_json["DatabaseName"] == "database" assert queued_message_json["IgnoreSizeLimit"] is False assert queued_message_json["AdditionalProperties"]["format"] == "csv" assert queued_message_json["FlushImmediately"] is False assert queued_message_json["TableName"] == "table" assert queued_message_json["RawDataSize"] > 0 assert queued_message_json["RetainBlobOnSuccess"] is True upload_blob_kwargs = mock_upload_blob_from_stream.call_args_list[0][1] assert type(upload_blob_kwargs["data"]) == io.BufferedReader
class PythonAdxOutput(IOutput): """ An output component that ingests events to Azure Data Explorer (ADX) using queued ingestion. ... Attributes ---------- cluster : str Azure Data Explorer (ADX) cluster address. eg, 'CDOC.kusto.windows.net' database : str Azure Data Explorer (ADX) database name. eg, 'TestDb' table : str Azure Data Explorer (ADX) table name. eg, 'OutputTable' clientId : str Azure Data Explorer (ADX) client Id that has permissions to access ADX. clientSecret : str Azure Data Explorer (ADX) access key. Used along with client Id. authority : str Azure Data Explorer (ADX) authority. Optional. When not specified, 'microsoft.com' is used. resetTable : bool Default is False. If True, the existing data in the destination table is dropped before new data is logged. """ __namespace__ = "KqlPython" def __init__(self, cluster, database, table, clientId, clientSecret, authority="microsoft.com", resetTable=False): """ Parameters ---------- cluster : str Azure Data Explorer (ADX) cluster address. eg, 'CDOC.kusto.windows.net' database : str Azure Data Explorer (ADX) database name. eg, 'TestDb' table : str Azure Data Explorer (ADX) table name. eg, 'OutputTable' clientId : str Azure Data Explorer (ADX) client Id that has permissions to access ADX. clientSecret : str Azure Data Explorer (ADX) access key. Used along with client Id. authority : str Azure Data Explorer (ADX) authority. Optional. When not specified, 'microsoft.com' is used. resetTable : bool Default is False. If True, the existing data in the destination table is dropped before new data is logged. """ self.running = True self.batchSize = 10000 self.flushDuration = timedelta(milliseconds = 1000) self.lastUploadTime = datetime.utcnow() self.initTable = False self.nextBatch = list() self.currentBatch = None self.lock = threading.Lock() self.resetTable = resetTable self.database = database self.table = table self.kcsbData = KustoConnectionStringBuilder.with_aad_application_key_authentication(f"https://{cluster}:443/", clientId, clientSecret, authority) self.kcsbIngest = KustoConnectionStringBuilder.with_aad_application_key_authentication(f"https://ingest-{cluster}:443/", clientId, clientSecret, authority) self.dataClient = KustoClient(self.kcsbData) self.ingestClient = QueuedIngestClient(self.kcsbIngest) self.ingestionProps = IngestionProperties(database=database, table=table,) def KqlOutputAction(self,kqlOutput: KqlOutput): """Outputs events that have been processed by a KQL query""" self.OutputAction(kqlOutput.Output) def OutputAction(self,dictOutput: Dictionary): """Outputs events either to console or to custom function""" try: if self.running: # Convert C# Dictionary to Python dict txt = JsonConvert.SerializeObject(dictOutput) newEvent = json.loads(txt) # Initialize table if not self.initTable: self.CreateOrResetTable(newEvent) self.initTable = True # Check if it's time to upload a batch if (len(self.nextBatch) >= self.batchSize) or (datetime.utcnow() > self.lastUploadTime + self.flushDuration): self.UploadBatch() self.nextBatch.append(newEvent) except: self.running = False print(sys.exc_info()) print(traceback.print_exc()) def OutputError(self,error): """Outputs errors to console""" self.running = False print(error) def OutputCompleted(self): """Signals the end of the input event stream""" if self.running: self.UploadBatch() self.running = False def Stop(self): """Signals end of program""" print('\nCompleted!') print('\nThank you for using Real-time KQL!') def UploadBatch(self): """Ingests batch of events to Kusto using queued ingestion""" self.lock.acquire() try: if self.currentBatch != None: raise Exception('Upload must not be called before the batch currently being uploaded is completed') self.currentBatch = self.nextBatch self.nextBatch = list() if len(self.currentBatch) > 0: df = DataFrame(self.currentBatch) self.ingestClient.ingest_from_dataframe(df, ingestion_properties=self.ingestionProps) print(f"{len(self.currentBatch)},", end = " ") self.currentBatch = None self.lastUploadTime = datetime.utcnow() except: self.running = False print(sys.exc_info()) print(traceback.print_exc()) finally: self.lock.release() def CreateOrResetTable(self,data): """Creates or resets ADX table""" if self.resetTable: # Dropping table self.dataClient.execute(self.database, f".drop table {self.table} ifexists") # Create-merge table tableMapping = "(" for item in data: tableMapping += f"{item}: {self.GetColumnType(data[item])}, " tableMapping = tableMapping[:-2] + ")" createMergeTableCommand = f".create-merge table {self.table} " + tableMapping self.dataClient.execute(self.database, createMergeTableCommand) def GetColumnType(self,item): """Returns Kusto data type string equivalent of python object""" if isinstance(item, str): return "string" elif isinstance(item, bool): return "bool" elif isinstance(item, datetime): return "datetime" elif isinstance(item, timedelta): return "timespan" elif isinstance(item, (int, bytes, bytearray)): return "int" elif isinstance(item, float): return "real" else: return "dynamic"
# ingest from blob blob_descriptor = BlobDescriptor( "https://{path_to_blob}.csv.gz?sp=rl&st=2020-05-20T13:38:37Z&se=2020-05-21T13:38:37Z&sv=2019-10-10&sr=c&sig=xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", 10, ) # 10 is the raw size of the data in bytes. client.ingest_from_blob(blob_descriptor, ingestion_properties=ingestion_props) # ingest from dataframe import pandas fields = ["id", "name", "value"] rows = [[1, "abc", 15.3], [2, "cde", 99.9]] df = pandas.DataFrame(data=rows, columns=fields) client.ingest_from_dataframe(df, ingestion_properties=ingestion_props) # ingest a whole folder. import os path = "folder/path" [ client.ingest_from_file(f, ingestion_properties=ingestion_props) for f in os.listdir(path) ] ################################################################## ## INGESTION STATUS ## ################################################################## # if status updates are required, something like this can be done