def ingest_to_ADX(filepath, filesize): KCSB_INGEST = KustoConnectionStringBuilder.with_aad_device_authentication( DATA_INGESTION_URI) KCSB_INGEST.authority_id = AAD_TENANT_ID KCSB_ENGINE = KustoConnectionStringBuilder.with_aad_device_authentication( URI) KCSB_ENGINE.authority_id = AAD_TENANT_ID INGESTION_CLIENT = KustoIngestClient(KCSB_INGEST) INGESTION_PROPERTIES = IngestionProperties( database=DATABASE, table=DESTINATION_TABLE, dataFormat=DataFormat.CSV, mappingReference=DESTINATION_TABLE_COLUMN_MAPPING, additionalProperties={'ignoreFirstRecord': 'true'}, reportLevel=ReportLevel.FailuresAndSuccesses) BLOB_PATH = "https://" + SOURCE_CSV_BLOB_ACCOUNT + ".blob.core.windows.net/" + SOURCE_CSV_CONTAINER + "/" + filepath + SOURCE_CSV_BLOB_TOKEN BLOB_DESCRIPTOR = BlobDescriptor( BLOB_PATH, filesize) # 10 is the raw size of the data in bytes INGESTION_CLIENT.ingest_from_blob( BLOB_DESCRIPTOR, ingestion_properties=INGESTION_PROPERTIES) print('Done queuing up ingestion with Azure Data Explorer ' + filepath)
def Ingest(Tag): # setting AUTHORITY_ID = "6babcaad-604b-40ac-a9d7-9fd97c0b779f" INGESTCLUSTER = "https://ingest-cgadataout.kusto.windows.net" KUSTOCLUSTER = "https://cgadataout.kusto.windows.net" DATABASE = "DevRelWorkArea" # Create table KCSB_DATA = KustoConnectionStringBuilder.with_aad_device_authentication( KUSTOCLUSTER) DESTINATION_TABLE = "RepoContributors" DESTINATION_TABLE_COLUMN_MAPPING = "RepoContributors_CSV_Mapping" KUSTO_CLIENT = KustoClient(KCSB_DATA) DROP_TABLE_IF_EXIST = ".drop table RepoContributors ifexists" RESPONSE = KUSTO_CLIENT.execute_mgmt(DATABASE, DROP_TABLE_IF_EXIST) CREATE_TABLE_COMMAND = ".create table RepoContributors (Article: string, Contributors: int64, Data: string)" RESPONSE = KUSTO_CLIENT.execute_mgmt(DATABASE, CREATE_TABLE_COMMAND) print("RepoContributors table is created") # Create mapping CREATE_MAPPING_COMMAND = """.create table RepoContributors ingestion csv mapping 'RepoContributors_CSV_Mapping' '[{"Name": "Article","datatype": "string","Ordinal": 0},{"Name": "Contributors","datatype": "int64","Ordinal": 1},{"Name": "Data","datatype": "string","Ordinal": 2}]'""" RESPONSE = KUSTO_CLIENT.execute_mgmt(DATABASE, CREATE_MAPPING_COMMAND) print("mapping is created") # Ingest # The authentication method will be taken from the chosen KustoConnectionStringBuilder. ingestion_props = IngestionProperties( database="DevRelWorkArea", table="RepoContributors", dataFormat=DataFormat.CSV, ingestByTags=[Tag], dropByTags=[Tag], mappingReference=DESTINATION_TABLE_COLUMN_MAPPING, reportLevel=ReportLevel.FailuresAndSuccesses, additionalProperties={'ignoreFirstRecord': 'true'}) kcsb = KustoConnectionStringBuilder.with_aad_device_authentication( INGESTCLUSTER) client = KustoIngestClient(kcsb) # ingest from file file_descriptor = FileDescriptor( r"D:\test\Results\log_data_merge\merge_microsoftdocs_sql-docs-pr.txt", 3333) # 3333 is the raw size of the data in bytes. client.ingest_from_file(file_descriptor, ingestion_properties=ingestion_props) # if status updates are required, something like this can be done return 1
def ingest_to_ADX(filepath, telemetry_block_blob_service, container_name, blob_account, file_size, tc,vm_uuid,deploy_uuid,config_uuid): ingest_source_id=str(uuid.uuid4()) KCSB_INGEST = KustoConnectionStringBuilder.with_aad_device_authentication(DATA_INGESTION_URI) KCSB_INGEST.authority_id = APP_AAD_TENANT_ID INGESTION_CLIENT = KustoIngestClient(KCSB_INGEST) ing_map=[JsonColumnMapping("vm_uuid", "$.vm_uuid", "string"), JsonColumnMapping("deploy_uuid", "$.deployment_description[0].deploy_uuid", "string"), JsonColumnMapping("config_uuid", "$.vm_configuration[0].config_uuid", "string"), JsonColumnMapping("rawdata", "$", "dynamic")] INGESTION_PROPERTIES = IngestionProperties(database=DATABASE, table=DESTINATION_TABLE, dataFormat=DataFormat.JSON, ingestionMapping=ing_map, reportLevel=ReportLevel.FailuresAndSuccesses,flushImmediately=IS_FLUSH_IMMEDIATELY) print("Database {} Tabele {}".format(DATABASE,DESTINATION_TABLE)) BLOB_PATH = "https://" + blob_account + ".blob.core.windows.net/" + container_name + "/" + filepath + CLEAN_FILE_TOKEN print (BLOB_PATH,' ',str(file_size), ingest_source_id) BLOB_DESCRIPTOR = BlobDescriptor(BLOB_PATH, file_size, ingest_source_id) # 10 is the raw size of the data in bytes INGESTION_CLIENT.ingest_from_blob(BLOB_DESCRIPTOR,ingestion_properties=INGESTION_PROPERTIES) tc.context.properties["ingest_source_id"]=ingest_source_id min_datatime=0 max_datatime=0 total_records=1 doc_id=save_COSMOS_log(vm_uuid,deploy_uuid,config_uuid,filepath,min_datatime,max_datatime, total_records,ingest_source_id,blob_account,container_name, tc) tc.track_event(APP_INSIGHT_INGEST_EVENT_NAME, { 'FILE_PATH': filepath,'DOC_ID':doc_id,"SOURCE_ID":ingest_source_id }, { 'TOTOAL_RECORDS': total_records, 'FILE_SIZE':file_size,'MIN_DATETIME':min_datatime,'MAX_DATETIME': max_datatime }) log_msg="{} Done queuing up ingestion with Azure Data Explorer {}, Ingest SourceID {}".format(LOG_MESSAGE_HEADER,filepath,ingest_source_id) print(log_msg) tc.track_trace(log_msg) tc.flush()
def ingest_to_ADX(filepath, telemetry_block_blob_service, container_name, blob_account, tc): ingest_source_id = str(uuid.uuid4()) #file_size=BlockBlobService.get_blob_properties(telemetry_block_blob_service,container_name,filepath).properties.content_length #print (filepath+" File Size "+str(file_size)) KCSB_INGEST = KustoConnectionStringBuilder.with_aad_device_authentication( DATA_INGESTION_URI) KCSB_INGEST.authority_id = APP_AAD_TENANT_ID vm_uuid, config_uuid, deploy_uuid, file_size, min_datatime, max_datatime, total_records = get_uuids_from_csv( telemetry_block_blob_service, container_name, filepath) dropByTag = vm_uuid + '_' + config_uuid + '_' + deploy_uuid INGESTION_CLIENT = KustoIngestClient(KCSB_INGEST) INGESTION_PROPERTIES = IngestionProperties( database=DATABASE, table=DESTINATION_TABLE, dataFormat=DataFormat.CSV, mappingReference=DESTINATION_TABLE_COLUMN_MAPPING, additionalProperties={ 'ignoreFirstRecord': 'true', 'reportMethod': 'QueueAndTable' }, reportLevel=ReportLevel.FailuresAndSuccesses, dropByTags=[dropByTag], flushImmediately=IS_FLUSH_IMMEDIATELY) BLOB_PATH = "https://" + SOURCE_OSMETRICS_BLOB_ACCOUNT + ".blob.core.windows.net/" + SOURCE_OSMETRICS_CONTAINER + "/" + filepath + SOURCE_OSMETRICS_FILE_TOKEN #print (BLOB_PATH,' ',str(file_size)) BLOB_DESCRIPTOR = BlobDescriptor( BLOB_PATH, file_size, ingest_source_id) # 10 is the raw size of the data in bytes INGESTION_CLIENT.ingest_from_blob( BLOB_DESCRIPTOR, ingestion_properties=INGESTION_PROPERTIES) tc.context.properties["ingest_source_id"] = str(ingest_source_id) doc_id = save_COSMOS_log(vm_uuid, deploy_uuid, config_uuid, filepath, min_datatime, max_datatime, total_records, ingest_source_id, blob_account, container_name, tc) tc.track_event(APP_INSIGHT_INGEST_EVENT_NAME, { 'FILE_PATH': filepath, 'DOC_ID': doc_id, "SOURCE_ID": ingest_source_id }, { 'TOTOAL_RECORDS': total_records, 'FILE_SIZE': file_size, 'MIN_DATETIME': min_datatime, 'MAX_DATETIME': max_datatime }) log_msg = "{} Done queuing up ingestion with Azure Data Explorer {}, Ingest SourceID {}".format( LOG_MESSAGE_HEADER, filepath, ingest_source_id) print(log_msg) tc.track_trace(log_msg) tc.flush()
def __init__( self, kusto_cluster, client_id=None, client_secret=None, username=None, password=None, certificate=None, certificate_thumbprint=None, authority=None, ): """ Kusto Client constructor. Parameters ---------- kusto_cluster : str Kusto cluster endpoint. Example: https://help.kusto.windows.net client_id : str The AAD application ID of the application making the request to Kusto client_secret : str The AAD application key of the application making the request to Kusto. if this is given, then username/password should not be. username : str The username of the user making the request to Kusto. if this is given, then password must follow and the client_secret should not be given. password : str The password matching the username of the user making the request to Kusto authority : 'microsoft.com', optional In case your tenant is not microsoft please use this param. """ if all([username, password]): kcsb = KustoConnectionStringBuilder.with_aad_user_password_authentication( kusto_cluster, username, password) elif all([client_id, client_secret]): kcsb = KustoConnectionStringBuilder.with_aad_application_key_authentication( kusto_cluster, client_id, client_secret) elif all([client_id, certificate, certificate_thumbprint]): kcsb = KustoConnectionStringBuilder.with_aad_application_certificate_authentication( kusto_cluster, client_id, certificate, certificate_thumbprint) else: kcsb = KustoConnectionStringBuilder.with_aad_device_authentication( kusto_cluster) if authority: kcsb.authority_id = authority self.client = KustoClient(kcsb) # replace aadhelper to use remote browser in interactive mode self.client._aad_helper = _MyAadHelper(kcsb) self.mgmt_endpoint_version = "v2" if self.client._mgmt_endpoint.endswith( "v2/rest/query") else "v1" self.query_endpoint_version = "v2" if self.client._query_endpoint.endswith( "v2/rest/query") else "v1"
def test_conn_method_aad_device(self, mock_init): mock_init.return_value = None db.merge_conn( Connection(conn_id=ADX_TEST_CONN_ID, conn_type='azure_data_explorer', host='https://help.kusto.windows.net', extra=json.dumps({'auth_method': 'AAD_DEVICE'}))) AzureDataExplorerHook(azure_data_explorer_conn_id=ADX_TEST_CONN_ID) assert mock_init.called_with( KustoConnectionStringBuilder.with_aad_device_authentication( 'https://help.kusto.windows.net'))
def get_conn(self) -> KustoClient: """Return a KustoClient object.""" conn = self.get_connection(self.conn_id) cluster = conn.host if not cluster: raise AirflowException('Host connection option is required') def get_required_param(name: str) -> str: """Extract required parameter from extra JSON, raise exception if not found""" value = conn.extra_dejson.get(name) if not value: raise AirflowException( f'Extra connection option is missing required parameter: `{name}`' ) return value auth_method = get_required_param('auth_method') or get_required_param( 'extra__azure_data_explorer__auth_method') if auth_method == 'AAD_APP': tenant = get_required_param('tenant') or get_required_param( 'extra__azure_data_explorer__tenant') kcsb = KustoConnectionStringBuilder.with_aad_application_key_authentication( cluster, conn.login, conn.password, tenant) elif auth_method == 'AAD_APP_CERT': certificate = get_required_param( 'certificate') or get_required_param( 'extra__azure_data_explorer__certificate') thumbprint = get_required_param( 'thumbprint') or get_required_param( 'extra__azure_data_explorer__thumbprint') tenant = get_required_param('tenant') or get_required_param( 'extra__azure_data_explorer__tenant') kcsb = KustoConnectionStringBuilder.with_aad_application_certificate_authentication( cluster, conn.login, certificate, thumbprint, tenant, ) elif auth_method == 'AAD_CREDS': tenant = get_required_param('tenant') or get_required_param( 'extra__azure_data_explorer__tenant') kcsb = KustoConnectionStringBuilder.with_aad_user_password_authentication( cluster, conn.login, conn.password, tenant) elif auth_method == 'AAD_DEVICE': kcsb = KustoConnectionStringBuilder.with_aad_device_authentication( cluster) else: raise AirflowException( f'Unknown authentication method: {auth_method}') return KustoClient(kcsb)
def test_aad_device_login(self): """Checks kcsb that is created with AAD device login.""" kcsb = KustoConnectionStringBuilder.with_aad_device_authentication("localhost") assert kcsb.data_source == "localhost" assert kcsb.aad_federated_security assert kcsb.aad_user_id is None assert kcsb.password is None assert kcsb.application_client_id is None assert kcsb.application_key is None assert kcsb.authority_id == "common" assert repr(kcsb) == "Data Source=localhost;AAD Federated Security=True;Authority Id=common" assert str(kcsb) == "Data Source=localhost;AAD Federated Security=True;Authority Id=common"
def test_no_credentials(self): """Checks kcsb that is created with no credentials""" kcsbs = [ KustoConnectionStringBuilder("localhost"), KustoConnectionStringBuilder("data Source=localhost"), KustoConnectionStringBuilder("Addr=localhost"), KustoConnectionStringBuilder("Addr = localhost"), KustoConnectionStringBuilder.with_aad_device_authentication( "localhost"), ] for kcsb in kcsbs: self._validate_kcsb_without_credentials(kcsb, "localhost")
def get_client(cluster): """ get cached, authenticated client for given cluster """ global _client_cache c = _client_cache.get(cluster) if c is None: c = KustoClient( KustoConnectionStringBuilder.with_aad_device_authentication( cluster)) c.execute('VSO', 'print "a" | take 0') _client_cache[cluster] = c return c
def __init__(self, conn_kv): """ Kusto Client constructor. Parameters ---------- kusto_cluster : str Kusto cluster endpoint. Example: https://help.kusto.windows.net client_id : str The AAD application ID of the application making the request to Kusto client_secret : str The AAD application key of the application making the request to Kusto. if this is given, then username/password should not be. username : str The username of the user making the request to Kusto. if this is given, then password must follow and the client_secret should not be given. password : str The password matching the username of the user making the request to Kusto authority : 'microsoft.com', optional In case your tenant is not microsoft please use this param. """ kusto_cluster = "https://{0}.kusto.windows.net".format(conn_kv["cluster"]) if all([conn_kv.get("username"), conn_kv.get("password")]): kcsb = KustoConnectionStringBuilder.with_aad_user_password_authentication(kusto_cluster, conn_kv.get("username"), conn_kv.get("password")) if conn_kv.get("tenant") is not None: kcsb.authority_id = conn_kv.get("tenant") elif all([conn_kv.get("clientid"), conn_kv.get("clientsecret")]): kcsb = KustoConnectionStringBuilder.with_aad_application_key_authentication( kusto_cluster, conn_kv.get("clientid"), conn_kv.get("clientsecret"), conn_kv.get("tenant")) elif all([conn_kv.get("clientid"), conn_kv.get("certificate"), conn_kv.get("certificate_thumbprint")]): kcsb = KustoConnectionStringBuilder.with_aad_application_certificate_authentication( kusto_cluster, conn_kv.get("clientid"), conn_kv.get("certificate"), conn_kv.get("certificate_thumbprint", conn_kv.get("tenant")) ) else: kcsb = KustoConnectionStringBuilder.with_aad_device_authentication(kusto_cluster) if conn_kv.get("tenant") is not None: kcsb.authority_id = conn_kv.get("tenant") self.client = KustoClient(kcsb) # replace aadhelper to use remote browser in interactive mode self.client._aad_helper = _MyAadHelper(kcsb, self._DEFAULT_CLIENTID) self.mgmt_endpoint_version = "v2" if self.client._mgmt_endpoint.endswith("v2/rest/query") else "v1" self.query_endpoint_version = "v2" if self.client._query_endpoint.endswith("v2/rest/query") else "v1"
def test_aad_device_login(self): """Checks kcsb that is created with AAD device login.""" kcsb = KustoConnectionStringBuilder.with_aad_device_authentication( "localhost") self.assertEqual(kcsb.data_source, "localhost") self.assertTrue(kcsb.aad_federated_security) self.assertIsNone(kcsb.aad_user_id) self.assertIsNone(kcsb.password) self.assertIsNone(kcsb.application_client_id) self.assertIsNone(kcsb.application_key) self.assertEqual(kcsb.authority_id, "common") self.assertEqual( repr(kcsb), "Data Source=localhost;AAD Federated Security=True;Authority Id=common" ) self.assertEqual( str(kcsb), "Data Source=localhost;AAD Federated Security=True;Authority Id=common" )
def get_conn(self) -> KustoClient: """Return a KustoClient object.""" conn = self.get_connection(self.conn_id) cluster = conn.host if not cluster: raise AirflowException('Host connection option is required') def get_required_param(name): """Extract required parameter from extra JSON, raise exception if not found""" value = conn.extra_dejson.get(name) if not value: raise AirflowException( 'Extra connection option is missing required parameter: `{}`' .format(name)) return value auth_method = get_required_param('auth_method') if auth_method == 'AAD_APP': kcsb = KustoConnectionStringBuilder.with_aad_application_key_authentication( cluster, conn.login, conn.password, get_required_param('tenant')) elif auth_method == 'AAD_APP_CERT': kcsb = KustoConnectionStringBuilder.with_aad_application_certificate_authentication( cluster, conn.login, get_required_param('certificate'), get_required_param('thumbprint'), get_required_param('tenant'), ) elif auth_method == 'AAD_CREDS': kcsb = KustoConnectionStringBuilder.with_aad_user_password_authentication( cluster, conn.login, conn.password, get_required_param('tenant')) elif auth_method == 'AAD_DEVICE': kcsb = KustoConnectionStringBuilder.with_aad_device_authentication( cluster) else: raise AirflowException( 'Unknown authentication method: {}'.format(auth_method)) return KustoClient(kcsb)
def authenticate_kusto(kusto_cluster): tenant_id = '72f988bf-86f1-41af-91ab-2d7cd011db47' KCSB = KustoConnectionStringBuilder.with_aad_device_authentication(kusto_cluster) KCSB.authority_id = tenant_id return KustoClient(KCSB)
mappings.append( JsonColumnMapping(columnName="xtime", jsonPath="$.xtime", cslDataType="timespan")) mappings.append( JsonColumnMapping(columnName="xtextWithNulls", jsonPath="$.xtextWithNulls", cslDataType="string")) mappings.append( JsonColumnMapping(columnName="xdynamicWithNulls", jsonPath="$.xdynamicWithNulls", cslDataType="dynamic")) return mappings engine_kcsb = KustoConnectionStringBuilder.with_aad_device_authentication( "https://toshetah.kusto.windows.net") dm_kcsb = KustoConnectionStringBuilder.with_aad_device_authentication( "https://ingest-toshetah.kusto.windows.net") client = KustoClient(engine_kcsb) ingest_client = KustoIngestClient(dm_kcsb) ingest_status_q = KustoIngestStatusQueues(ingest_client) client.execute("PythonTest", ".drop table Deft ifexists") @pytest.mark.run(order=1) def test_csv_ingest_non_existing_table(): csv_ingest_props = IngestionProperties( "PythonTest", "Deft", dataFormat=DataFormat.csv, mapping=Helpers.create_deft_table_csv_mappings(),
mappings.append( JsonColumnMapping(columnName="xtextWithNulls", jsonPath="$.xtextWithNulls", cslDataType="string")) mappings.append( JsonColumnMapping(columnName="xdynamicWithNulls", jsonPath="$.xdynamicWithNulls", cslDataType="dynamic")) return mappings cluster = "Dadubovs1.westus" # "toshetah" db_name = "TestingDatabase" # "PythonTest" table_name = "Deft" engine_kcsb = KustoConnectionStringBuilder.with_aad_device_authentication( "https://{}.kusto.windows.net".format(cluster)) dm_kcsb = KustoConnectionStringBuilder.with_aad_device_authentication( "https://ingest-{}.kusto.windows.net".format(cluster)) client = KustoClient(engine_kcsb) ingest_client = KustoIngestClient(dm_kcsb) ingest_status_q = KustoIngestStatusQueues(ingest_client) client.execute(db_name, ".drop table {} ifexists".format(table_name)) @pytest.mark.run(order=1) def test_csv_ingest_non_existing_table(): csv_ingest_props = IngestionProperties( db_name, table_name, dataFormat=DataFormat.csv,
FileDescriptor, BlobDescriptor, DataFormat, ReportLevel, ) # there are a lot of useful properties, make sure to go over docs and check them out ingestion_props = IngestionProperties( database="{database_name}", table="{table_name}", dataFormat=DataFormat.csv, # incase status update for success are also required # reportLevel=ReportLevel.FailuresAndSuccesses, ) client = KustoIngestClient( KustoConnectionStringBuilder.with_aad_device_authentication( "https://ingest-{cluster_name}.kusto.windows.net")) # there are more options for authenticating - see azure-kusto-data samples ################################################################## ## INGESTION ## ################################################################## # ingest from file file_descriptor = FileDescriptor( "{filename}.csv", 3333) # 3333 is the raw size of the data in bytes. client.ingest_from_file(file_descriptor, ingestion_properties=ingestion_props) client.ingest_from_file("{filename}.csv", ingestion_properties=ingestion_props) # ingest from blob blob_descriptor = BlobDescriptor(
from azure.kusto.data.request import KustoClient, KustoConnectionStringBuilder from azure.kusto.data.exceptions import KustoServiceError from azure.kusto.data.helpers import dataframe_from_result_table cluster_name = "https://sqlazureweu2.kustomfa.windows.net" kustoStringBuilder = KustoConnectionStringBuilder.with_aad_device_authentication( cluster_name) kusto_client = KustoClient(kustoStringBuilder) query = '''MonDmDbHadrReplicaStates | where TIMESTAMP > datetime(2019-06-16 12:39:52) and TIMESTAMP < datetime(2019-06-16 13:21:05) | where LogicalServerName == "clperftesting-gen5-bc8-loose24-ac-weu-00" | where isnotempty(toguid(logical_database_name)) | where is_primary_replica == 0 and is_local == 1 | project TIMESTAMP, NodeName, redo_queue_size_mb = redo_queue_size /1024., redo_rate, failover_time_sec = redo_queue_size*1.0/redo_rate | order by TIMESTAMP asc | summarize redo_queue_list = make_list(redo_queue_size_mb), time_series = make_list(TIMESTAMP), failover_time_sec_list = make_list(failover_time_sec) by NodeName | extend redo_diff_mb= series_fir(redo_queue_list, dynamic([1,-1]), false, false) | extend redo_stats = series_stats_dynamic(redo_diff_mb) | extend failover_time_smooth_secs = series_fir(failover_time_sec_list, dynamic ([1,1,1]), true, true) //normalize= true, center= true | extend failover_time_stats = series_stats_dynamic(failover_time_smooth_secs) | project redo_stats.avg, failover_time_stats.max''' try: print("querying") failover_response = kusto_client.execute("sqlazure1", query)
def send_kusto_command_from_file(client, database, file_path): """ Reads a Kusto command from a file, and sends the command to Kusto and returns the results from the response """ if (not client or not database.strip() or not file_path.strip()): raise ValueError("All arguments of this function are mandatory") kusto_file = open(file_path, 'r') command = kusto_file.read().replace('\n', ' ') response = client.execute_mgmt(database, command) return response.primary_results[0] # create a connection string with a device code (=interactive) authentication kcsb = KustoConnectionStringBuilder.with_aad_device_authentication(cluster) # create the client and do the actual authentication client = KustoClient(kcsb) # run the command to create the target table response = send_kusto_command_from_file(client, database, './table-target-create.csl') print("Created target table. Response = ", response) # run the command to create the function of data parsing response = send_kusto_command_from_file(client, database, './function-source-data-parsing.csl') print("Created parsing function. Response = ", response) # run the command to create the update policy to parse the source data into the target table
def update_ADX_ingest_status(tc): KCSB_INGEST = KustoConnectionStringBuilder.with_aad_device_authentication( DATA_INGESTION_URI) KCSB_INGEST.authority_id = APP_AAD_TENANT_ID INGESTION_CLIENT = KustoIngestClient(KCSB_INGEST) qs = KustoIngestStatusQueues(INGESTION_CLIENT) run_id = (str(uuid.uuid4()))[31:].upper() MAX_BACKOFF = 8 backoff = 1 total_queue_success_messages = 0 while True: ################### NOTICE #################### # in order to get success status updates, # make sure ingestion properties set the # reportLevel=ReportLevel.FailuresAndSuccesses. if qs.success.is_empty() and qs.failure.is_empty(): time.sleep(backoff) if backoff == 1 and total_queue_success_messages != 0: print( "{} RUN_ID:{} Processed {} message in this batch ".format( LOG_MESSAGE_HEADER, run_id, total_queue_success_messages)) backoff = min(backoff * 2, MAX_BACKOFF) if (backoff < MAX_BACKOFF): #print("{} No new messages. backing off for {} seconds".format(LOG_MESSAGE_HEADER,backoff)) continue if (backoff == MAX_BACKOFF): #print("{} Reach max waiting time {}, exit.".format(LOG_MESSAGE_HEADER,backoff)) break backoff = 1 success_messages = qs.success.pop(15) failure_messages = qs.failure.pop(15) total_success = 0 total_failure = 0 if success_messages is not None: if (len(success_messages) > 0): tc.track_trace("{} Get {} success ingest messages ".format( LOG_MESSAGE_HEADER, str(len(success_messages)))) total_success = len(success_messages) if failure_messages is not None: if (len(failure_messages) > 0): tc.track_trace("{} Get {} failure ingest messages ".format( LOG_MESSAGE_HEADER, str(len(failure_messages)))) total_failure = len(failure_messages) tc.flush() total_queue_success_messages += len(success_messages) count_success = 0 count_faulure = 0 for smsg in success_messages: file_path = get_file_path(smsg.IngestionSourcePath) container_name = get_container_name(smsg.IngestionSourcePath) count_success += 1 log_msg = "{} SUCCESS TO INGEST TO ADX <{}> -[{}/{}/{}] , Time: {}, vm_uuid: {}, source_id:{}, file path: {}".format( LOG_MESSAGE_HEADER, run_id, str(count_success), str(total_success), str(total_queue_success_messages), smsg.SucceededOn, get_vm_uuid_from_filename(file_path), smsg.IngestionSourceId, file_path) tc.track_trace(log_msg) tc.track_event( APP_INSIGHT_INGEST_SUCCESS_EVENT_NAME, { 'MESSAGE': 'SUCCESS TO Ingest ADX', 'file_path': file_path, 'source_id': smsg.IngestionSourceId }, {}) tc.flush() update_COSMOS_status(COSMOS_CLIENT, file_path, smsg.SucceededOn, SUCCESS_STATUS, str(smsg), get_vm_uuid_from_filename(file_path), smsg.IngestionSourceId, container_name, tc, count_success, run_id) telemetry_block_blob_service = BlockBlobService( account_name=SOURCE_TELEMETRY_BLOB_ACCOUNT, account_key=SOURCE_TELEMETRY_FILE_BLOB_KEY) target_file_path = '' if (PROCESSED_TELEMETRY_FOLDER.endswith('/')): target_file_path = PROCESSED_TELEMETRY_FOLDER + file_path else: target_file_path = PROCESSED_TELEMETRY_FOLDER + '/' + file_path move_processed_file(telemetry_block_blob_service, container_name, file_path, container_name, target_file_path, tc) tc.track_trace( '{} DONE ADX INGESTION PROCESS <{}> -[{}/{}/{}], File Moved to processed folder {} , vm_uuid: {}, file path: {}' .format(LOG_MESSAGE_HEADER, run_id, str(count_success), str(total_success), str(total_queue_success_messages), target_file_path, get_vm_uuid_from_filename(file_path), file_path)) tc.track_event( APP_INSIGHT_INGEST_SUCCESS_EVENT_NAME, { 'MESSAGE': 'DONE ADX INGESTION PROCESS', 'moved_file_path': target_file_path, 'source_file_path': file_path }, {}) tc.flush() #smsgjson=json.loads(smsg) #print (smsgjson['IngestionSourcePath']) #print (smsgjson['SucceededOn']) print("{} IngestionSourcePath: {}".format( LOG_MESSAGE_HEADER, smsg.IngestionSourcePath)) print(smsg.SucceededOn) for fmsg in failure_messages: container_name = get_container_name(fmsg.IngestionSourcePath) file_path = get_file_path(fmsg.IngestionSourcePath) count_faulure += 1 log_msg = "{} FAILED TO INGEST TO ADX <{}> -[{}/{}] , Time: {}, vm_uuid: {}, source_id:{}, container:{}, file path: {}, message: {}".format( LOG_MESSAGE_HEADER, run_id, str(count_faulure), str(total_failure), fmsg.FailedOn, get_vm_uuid_from_filename(file_path), fmsg.IngestionSourceId, container_name, file_path, str(fmsg)) tc.track_trace(log_msg) tc.track_event( APP_INSIGHT_INGEST_FAILURE_EVENT_NAME, { 'MESSAGE': 'FAILED TO Ingest ADX', 'file_path': file_path, 'source_id': fmsg.IngestionSourceId }, {}) tc.flush() update_COSMOS_status(COSMOS_CLIENT, file_path, fmsg.FailedOn, FAILURE_STATUS, str(fmsg), get_vm_uuid_from_filename(file_path), fmsg.IngestionSourceId, container_name, tc, count_faulure, run_id)