def ingest_to_ADX(filepath, filesize): KCSB_INGEST = KustoConnectionStringBuilder.with_aad_device_authentication( DATA_INGESTION_URI) KCSB_INGEST.authority_id = AAD_TENANT_ID KCSB_ENGINE = KustoConnectionStringBuilder.with_aad_device_authentication( URI) KCSB_ENGINE.authority_id = AAD_TENANT_ID INGESTION_CLIENT = KustoIngestClient(KCSB_INGEST) INGESTION_PROPERTIES = IngestionProperties( database=DATABASE, table=DESTINATION_TABLE, dataFormat=DataFormat.CSV, mappingReference=DESTINATION_TABLE_COLUMN_MAPPING, additionalProperties={'ignoreFirstRecord': 'true'}, reportLevel=ReportLevel.FailuresAndSuccesses) BLOB_PATH = "https://" + SOURCE_CSV_BLOB_ACCOUNT + ".blob.core.windows.net/" + SOURCE_CSV_CONTAINER + "/" + filepath + SOURCE_CSV_BLOB_TOKEN BLOB_DESCRIPTOR = BlobDescriptor( BLOB_PATH, filesize) # 10 is the raw size of the data in bytes INGESTION_CLIENT.ingest_from_blob( BLOB_DESCRIPTOR, ingestion_properties=INGESTION_PROPERTIES) print('Done queuing up ingestion with Azure Data Explorer ' + filepath)
def ingest_to_ADX(filepath, telemetry_block_blob_service, container_name, blob_account, file_size, tc,vm_uuid,deploy_uuid,config_uuid): ingest_source_id=str(uuid.uuid4()) KCSB_INGEST = KustoConnectionStringBuilder.with_aad_device_authentication(DATA_INGESTION_URI) KCSB_INGEST.authority_id = APP_AAD_TENANT_ID INGESTION_CLIENT = KustoIngestClient(KCSB_INGEST) ing_map=[JsonColumnMapping("vm_uuid", "$.vm_uuid", "string"), JsonColumnMapping("deploy_uuid", "$.deployment_description[0].deploy_uuid", "string"), JsonColumnMapping("config_uuid", "$.vm_configuration[0].config_uuid", "string"), JsonColumnMapping("rawdata", "$", "dynamic")] INGESTION_PROPERTIES = IngestionProperties(database=DATABASE, table=DESTINATION_TABLE, dataFormat=DataFormat.JSON, ingestionMapping=ing_map, reportLevel=ReportLevel.FailuresAndSuccesses,flushImmediately=IS_FLUSH_IMMEDIATELY) print("Database {} Tabele {}".format(DATABASE,DESTINATION_TABLE)) BLOB_PATH = "https://" + blob_account + ".blob.core.windows.net/" + container_name + "/" + filepath + CLEAN_FILE_TOKEN print (BLOB_PATH,' ',str(file_size), ingest_source_id) BLOB_DESCRIPTOR = BlobDescriptor(BLOB_PATH, file_size, ingest_source_id) # 10 is the raw size of the data in bytes INGESTION_CLIENT.ingest_from_blob(BLOB_DESCRIPTOR,ingestion_properties=INGESTION_PROPERTIES) tc.context.properties["ingest_source_id"]=ingest_source_id min_datatime=0 max_datatime=0 total_records=1 doc_id=save_COSMOS_log(vm_uuid,deploy_uuid,config_uuid,filepath,min_datatime,max_datatime, total_records,ingest_source_id,blob_account,container_name, tc) tc.track_event(APP_INSIGHT_INGEST_EVENT_NAME, { 'FILE_PATH': filepath,'DOC_ID':doc_id,"SOURCE_ID":ingest_source_id }, { 'TOTOAL_RECORDS': total_records, 'FILE_SIZE':file_size,'MIN_DATETIME':min_datatime,'MAX_DATETIME': max_datatime }) log_msg="{} Done queuing up ingestion with Azure Data Explorer {}, Ingest SourceID {}".format(LOG_MESSAGE_HEADER,filepath,ingest_source_id) print(log_msg) tc.track_trace(log_msg) tc.flush()
def ingest_to_ADX(filepath, telemetry_block_blob_service, container_name, blob_account, tc): ingest_source_id = str(uuid.uuid4()) #file_size=BlockBlobService.get_blob_properties(telemetry_block_blob_service,container_name,filepath).properties.content_length #print (filepath+" File Size "+str(file_size)) KCSB_INGEST = KustoConnectionStringBuilder.with_aad_device_authentication( DATA_INGESTION_URI) KCSB_INGEST.authority_id = APP_AAD_TENANT_ID vm_uuid, config_uuid, deploy_uuid, file_size, min_datatime, max_datatime, total_records = get_uuids_from_csv( telemetry_block_blob_service, container_name, filepath) dropByTag = vm_uuid + '_' + config_uuid + '_' + deploy_uuid INGESTION_CLIENT = KustoIngestClient(KCSB_INGEST) INGESTION_PROPERTIES = IngestionProperties( database=DATABASE, table=DESTINATION_TABLE, dataFormat=DataFormat.CSV, mappingReference=DESTINATION_TABLE_COLUMN_MAPPING, additionalProperties={ 'ignoreFirstRecord': 'true', 'reportMethod': 'QueueAndTable' }, reportLevel=ReportLevel.FailuresAndSuccesses, dropByTags=[dropByTag], flushImmediately=IS_FLUSH_IMMEDIATELY) BLOB_PATH = "https://" + SOURCE_OSMETRICS_BLOB_ACCOUNT + ".blob.core.windows.net/" + SOURCE_OSMETRICS_CONTAINER + "/" + filepath + SOURCE_OSMETRICS_FILE_TOKEN #print (BLOB_PATH,' ',str(file_size)) BLOB_DESCRIPTOR = BlobDescriptor( BLOB_PATH, file_size, ingest_source_id) # 10 is the raw size of the data in bytes INGESTION_CLIENT.ingest_from_blob( BLOB_DESCRIPTOR, ingestion_properties=INGESTION_PROPERTIES) tc.context.properties["ingest_source_id"] = str(ingest_source_id) doc_id = save_COSMOS_log(vm_uuid, deploy_uuid, config_uuid, filepath, min_datatime, max_datatime, total_records, ingest_source_id, blob_account, container_name, tc) tc.track_event(APP_INSIGHT_INGEST_EVENT_NAME, { 'FILE_PATH': filepath, 'DOC_ID': doc_id, "SOURCE_ID": ingest_source_id }, { 'TOTOAL_RECORDS': total_records, 'FILE_SIZE': file_size, 'MIN_DATETIME': min_datatime, 'MAX_DATETIME': max_datatime }) log_msg = "{} Done queuing up ingestion with Azure Data Explorer {}, Ingest SourceID {}".format( LOG_MESSAGE_HEADER, filepath, ingest_source_id) print(log_msg) tc.track_trace(log_msg) tc.flush()
# in case status update for success are also required # reportLevel=ReportLevel.FailuresAndSuccesses, # in case a mapping is required # ingestionMappingReference="{json_mapping_that_already_exists_on_table}" # ingestionMappingType=IngestionMappingType.Json ) # ingest from file file_descriptor = FileDescriptor("{filename}.csv", 3333) # 3333 is the raw size of the data in bytes. client.ingest_from_file(file_descriptor, ingestion_properties=ingestion_props) client.ingest_from_file("{filename}.csv", ingestion_properties=ingestion_props) # ingest from blob blob_descriptor = BlobDescriptor("https://{path_to_blob}.csv.gz?sas", 10) # 10 is the raw size of the data in bytes. client.ingest_from_blob(blob_descriptor, ingestion_properties=ingestion_props) # ingest from dataframe import pandas fields = ["id", "name", "value"] rows = [[1, "abc", 15.3], [2, "cde", 99.9]] df = pandas.DataFrame(data=rows, columns=fields) client.ingest_from_dataframe(df, ingestion_properties=ingestion_props) # ingest a whole folder. import os path = "folder/path"