def get_all_db_table_records_count(kusto_client: KustoClient, database: str, table_prefix: str = ''): # Count Records from CO Table and TEMP Tables in All Databases query_filter = f"union database(\"*\").CO*|union database(\"*\").TEMP*|count" print(f"query on database: {database} by query_filter: {query_filter}") response = kusto_client.execute(database, query_filter) result = response.primary_results[0] records_count = result.to_dict().get('data')[0] return records_count.get('Count')
def execute_command(cls, kusto_client: KustoClient, database_name: str, command: str) -> bool: """ Executes a Command using a premade client :param kusto_client: Premade client to run Commands. can be either an adminClient or queryClient :param database_name: DB name :param command: The Command to execute :return: True on success, false otherwise """ try: if command.startswith(cls.MGMT_PREFIX): client_request_properties = cls.create_client_request_properties( "Python_SampleApp_ControlCommand") else: client_request_properties = cls.create_client_request_properties( "Python_SampleApp_Query") result = kusto_client.execute(database_name, command, client_request_properties) print(f"Response from executed command '{command}':") for row in result.primary_results[0]: print(row.to_list()) return True except KustoClientError as ex: Utils.error_handler( f"Client error while trying to execute command '{command}' on database '{database_name}'", ex) except KustoServiceError as ex: Utils.error_handler( f"Server error while trying to execute command '{command}' on database '{database_name}'", ex) except Exception as ex: Utils.error_handler( f"Unknown error while trying to execute command '{command}' on database '{database_name}'", ex) return False
def get_dataset(): in_use = microsoft_config kcsb = KustoConnectionStringBuilder.with_aad_application_key_authentication( in_use['cluster'], in_use['client_id'], in_use["client_secret"], in_use["authority_id"]) query = ''' RawEventsAzCli | where EventTimestamp > ago(2h) and EventTimestamp < ago(1h) // Remove pre 2.0.28 telemetry as it has a different schema | where toint(split(tostring(parse_json(Properties).["context.default.vs.core.telemetryapi.productversion"]), '.')[2]) > 28 | where tostring(parse_json(Properties).["reserved.datamodel.action.result"]) == "Success" | sort by UserId | project UserId,command=parse_json(Properties)["context.default.azurecli.rawcommand"], params = parse_json(Properties).["context.default.azurecli.params"], os_type = tostring(parse_json(Properties).["context.default.vs.core.os.type"]), source = tostring(parse_json(Properties).["context.default.azurecli.source"]), start_time_str= todatetime(parse_json(Properties).["context.default.azurecli.starttime"]), end_time_str = todatetime(parse_json(Properties).["context.default.azurecli.endtime"]) | take 500 ''' client = KustoClient(kcsb) response = client.execute("AzureCli", query) dataset = {} for row in response.primary_results[0]: uid, command, params, os_type, source, begin, end = row params = sort_params(params) if uid not in dataset: dataset[uid] = [(command, params, os_type, source, begin, end)] else: last_cmd, last_params, *_ = dataset[uid][-1] last_params = sort_params(last_params) if last_cmd == command and last_params == params: continue # duplicate dataset[uid].append( (command, params, os_type, source, begin, end)) return dataset
class PythonAdxOutput(IOutput): """ An output component that ingests events to Azure Data Explorer (ADX) using queued ingestion. ... Attributes ---------- cluster : str Azure Data Explorer (ADX) cluster address. eg, 'CDOC.kusto.windows.net' database : str Azure Data Explorer (ADX) database name. eg, 'TestDb' table : str Azure Data Explorer (ADX) table name. eg, 'OutputTable' clientId : str Azure Data Explorer (ADX) client Id that has permissions to access ADX. clientSecret : str Azure Data Explorer (ADX) access key. Used along with client Id. authority : str Azure Data Explorer (ADX) authority. Optional. When not specified, 'microsoft.com' is used. resetTable : bool Default is False. If True, the existing data in the destination table is dropped before new data is logged. """ __namespace__ = "KqlPython" def __init__(self, cluster, database, table, clientId, clientSecret, authority="microsoft.com", resetTable=False): """ Parameters ---------- cluster : str Azure Data Explorer (ADX) cluster address. eg, 'CDOC.kusto.windows.net' database : str Azure Data Explorer (ADX) database name. eg, 'TestDb' table : str Azure Data Explorer (ADX) table name. eg, 'OutputTable' clientId : str Azure Data Explorer (ADX) client Id that has permissions to access ADX. clientSecret : str Azure Data Explorer (ADX) access key. Used along with client Id. authority : str Azure Data Explorer (ADX) authority. Optional. When not specified, 'microsoft.com' is used. resetTable : bool Default is False. If True, the existing data in the destination table is dropped before new data is logged. """ self.running = True self.batchSize = 10000 self.flushDuration = timedelta(milliseconds = 1000) self.lastUploadTime = datetime.utcnow() self.initTable = False self.nextBatch = list() self.currentBatch = None self.lock = threading.Lock() self.resetTable = resetTable self.database = database self.table = table self.kcsbData = KustoConnectionStringBuilder.with_aad_application_key_authentication(f"https://{cluster}:443/", clientId, clientSecret, authority) self.kcsbIngest = KustoConnectionStringBuilder.with_aad_application_key_authentication(f"https://ingest-{cluster}:443/", clientId, clientSecret, authority) self.dataClient = KustoClient(self.kcsbData) self.ingestClient = QueuedIngestClient(self.kcsbIngest) self.ingestionProps = IngestionProperties(database=database, table=table,) def KqlOutputAction(self,kqlOutput: KqlOutput): """Outputs events that have been processed by a KQL query""" self.OutputAction(kqlOutput.Output) def OutputAction(self,dictOutput: Dictionary): """Outputs events either to console or to custom function""" try: if self.running: # Convert C# Dictionary to Python dict txt = JsonConvert.SerializeObject(dictOutput) newEvent = json.loads(txt) # Initialize table if not self.initTable: self.CreateOrResetTable(newEvent) self.initTable = True # Check if it's time to upload a batch if (len(self.nextBatch) >= self.batchSize) or (datetime.utcnow() > self.lastUploadTime + self.flushDuration): self.UploadBatch() self.nextBatch.append(newEvent) except: self.running = False print(sys.exc_info()) print(traceback.print_exc()) def OutputError(self,error): """Outputs errors to console""" self.running = False print(error) def OutputCompleted(self): """Signals the end of the input event stream""" if self.running: self.UploadBatch() self.running = False def Stop(self): """Signals end of program""" print('\nCompleted!') print('\nThank you for using Real-time KQL!') def UploadBatch(self): """Ingests batch of events to Kusto using queued ingestion""" self.lock.acquire() try: if self.currentBatch != None: raise Exception('Upload must not be called before the batch currently being uploaded is completed') self.currentBatch = self.nextBatch self.nextBatch = list() if len(self.currentBatch) > 0: df = DataFrame(self.currentBatch) self.ingestClient.ingest_from_dataframe(df, ingestion_properties=self.ingestionProps) print(f"{len(self.currentBatch)},", end = " ") self.currentBatch = None self.lastUploadTime = datetime.utcnow() except: self.running = False print(sys.exc_info()) print(traceback.print_exc()) finally: self.lock.release() def CreateOrResetTable(self,data): """Creates or resets ADX table""" if self.resetTable: # Dropping table self.dataClient.execute(self.database, f".drop table {self.table} ifexists") # Create-merge table tableMapping = "(" for item in data: tableMapping += f"{item}: {self.GetColumnType(data[item])}, " tableMapping = tableMapping[:-2] + ")" createMergeTableCommand = f".create-merge table {self.table} " + tableMapping self.dataClient.execute(self.database, createMergeTableCommand) def GetColumnType(self,item): """Returns Kusto data type string equivalent of python object""" if isinstance(item, str): return "string" elif isinstance(item, bool): return "bool" elif isinstance(item, datetime): return "datetime" elif isinstance(item, timedelta): return "timespan" elif isinstance(item, (int, bytes, bytearray)): return "int" elif isinstance(item, float): return "real" else: return "dynamic"
def cleanup_all_db_table_records(kusto_client: KustoClient, database: str) -> None: query_filter = '.drop extents from all tables' print(f"query on database: {database} by query_filter: {query_filter}") kusto_client.execute(database, query_filter)
# Please note that if you choose this option, you'll need to authenticate for every new instance that is initialized. # It is highly recommended to create one instance and use it for all of your queries. kcsb = KustoConnectionStringBuilder.with_aad_device_authentication(cluster) # The authentication method will be taken from the chosen KustoConnectionStringBuilder. client = KustoClient(kcsb) ###################################################### ## QUERY ## ###################################################### # once authenticated, usage is as following db = "Samples" query = "StormEvents | take 10" response = client.execute(db, query) # iterating over rows is possible for row in response.primary_results[0]: # printing specific columns by index print("value at 0 {}".format(row[0])) print("\n") # printing specific columns by name print("EventType:{}".format(row["EventType"])) # tables are serializeable, so: with open("results.json", "w+") as f: f.write(str(response.primary_results[0])) # we also support dataframes: dataframe = dataframe_from_result_table(response.primary_results[0])
from azure.kusto.data import KustoClient, KustoConnectionStringBuilder from azure.kusto.data.exceptions import KustoServiceError from azure.kusto.data.helpers import dataframe_from_result_table import pandas as pd AAD_TENANT_ID = "72f988bf-86f1-41af-91ab-2d7cd011db47" KUSTO_CLUSTER = "https://help.kusto.windows.net/" KUSTO_DATABASE = "Samples" KCSB = KustoConnectionStringBuilder.with_aad_device_authentication( KUSTO_CLUSTER) KCSB.authority_id = AAD_TENANT_ID KUSTO_CLIENT = KustoClient(KCSB) KUSTO_QUERY = "StormEvents | project StartTime,State, EventType| sort by StartTime desc | take 10" try: RESPONSE = KUSTO_CLIENT.execute(KUSTO_DATABASE, KUSTO_QUERY) except KustoServiceError as error: print("Error : ", error) #print(RESPONSE.primary_results[0]) df = dataframe_from_result_table(RESPONSE.primary_results[0]) jsonData = df.to_json() print(jsonData)
from azure.kusto.data import KustoClient, KustoConnectionStringBuilder cluster = "https://mkadxcluster.eastus.kusto.windows.net" kcsb = KustoConnectionStringBuilder.with_az_cli_authentication(cluster) client = KustoClient(kcsb) db = "TestDatabase" query = "TestTable | take 3" response = client.execute(db, query) print(response) for row in response.primary_results[0]: print(f'{row}')
mappings.append( JsonColumnMapping(columnName="xtextWithNulls", jsonPath="$.xtextWithNulls", cslDataType="string")) mappings.append( JsonColumnMapping(columnName="xdynamicWithNulls", jsonPath="$.xdynamicWithNulls", cslDataType="dynamic")) return mappings KUSTO_CLIENT = KustoClient("https://toshetah.kusto.windows.net") KUSTO_INGEST_CLIENT = KustoIngestClient( "https://ingest-toshetah.kusto.windows.net") KUSTO_CLIENT.execute("PythonTest", ".drop table Deft ifexists") # Sanity test - ingest from csv to a non-existing table CSV_INGESTION_PROPERTIES = IngestionProperties( "PythonTest", "Deft", dataFormat=DataFormat.csv, mapping=Helpers.create_deft_table_csv_mappings()) CSV_FILE_PATH = os.path.join(os.getcwd(), "azure-kusto-ingest", "tests", "input", "dataset.csv") ZIPPED_CSV_FILE_PATH = os.path.join(os.getcwd(), "azure-kusto-ingest", "tests", "input", "dataset.csv.gz") KUSTO_INGEST_CLIENT.ingest_from_multiple_files( [CSV_FILE_PATH, ZIPPED_CSV_FILE_PATH], False, CSV_INGESTION_PROPERTIES) time.sleep(60)
start_time = datetime.datetime.now(datetime.timezone.utc) clean_previous_tests(client, test_db, test_table) input_folder_path = get_file_path() csv_file_path = os.path.join(input_folder_path, "dataset.csv") tsv_file_path = os.path.join(input_folder_path, "dataset.tsv") zipped_csv_file_path = os.path.join(input_folder_path, "dataset.csv.gz") json_file_path = os.path.join(input_folder_path, "dataset.json") zipped_json_file_path = os.path.join(input_folder_path, "dataset.jsonz.gz") current_count = 0 client.execute( test_db, ".create table {0} (rownumber: int, rowguid: string, xdouble: real, xfloat: real, xbool: bool, xint16: int, xint32: int, xint64: long, xuint8: long, xuint16: long, xuint32: long, xuint64: long, xdate: datetime, xsmalltext: string, xtext: string, xnumberAsText: string, xtime: timespan, xtextWithNulls: string, xdynamicWithNulls: dynamic)" .format(test_table), ) client.execute( test_db, ".create table {0} ingestion json mapping 'JsonMapping' {1}".format( test_table, TestData.test_table_json_mapping_reference())) # assertions def assert_rows_added(expected: int, timeout=60): global current_count actual = 0 while timeout > 0: time.sleep(1)
KUSTO_CLUSTER = "https://help.kusto.windows.net" # In case you want to authenticate with AAD application. CLIENT_ID = "<insert here your AAD application id>" CLIENT_SECRET = "<insert here your AAD application key>" KUSTO_CLIENT = KustoClient(kusto_cluster=KUSTO_CLUSTER, client_id=CLIENT_ID, client_secret=CLIENT_SECRET) # In case you want to authenticate with the logged in AAD user. KUSTO_CLIENT = KustoClient(kusto_cluster=KUSTO_CLUSTER) KUSTO_DATABASE = "Samples" KUSTO_QUERY = "StormEvents | take 10" RESPONSE = KUSTO_CLIENT.execute(KUSTO_DATABASE, KUSTO_QUERY) for row in RESPONSE.iter_all(): print(row[0], " ", row["EventType"]) # Query is too big to be executed KUSTO_QUERY = "StormEvents" try: RESPONSE = KUSTO_CLIENT.execute(KUSTO_DATABASE, KUSTO_QUERY) except KustoServiceError as error: print("2. Error:", error) print("2. Is semantic error:", error.is_semantic_error()) print("2. Has partial results:", error.has_partial_results()) print("2. Result size:", len(list(error.get_partial_results().iter_all()))) RESPONSE = KUSTO_CLIENT.execute(KUSTO_DATABASE, KUSTO_QUERY,