def ingest_to_ADX(filepath, telemetry_block_blob_service, container_name, blob_account, file_size, tc,vm_uuid,deploy_uuid,config_uuid): ingest_source_id=str(uuid.uuid4()) KCSB_INGEST = KustoConnectionStringBuilder.with_aad_device_authentication(DATA_INGESTION_URI) KCSB_INGEST.authority_id = APP_AAD_TENANT_ID INGESTION_CLIENT = KustoIngestClient(KCSB_INGEST) ing_map=[JsonColumnMapping("vm_uuid", "$.vm_uuid", "string"), JsonColumnMapping("deploy_uuid", "$.deployment_description[0].deploy_uuid", "string"), JsonColumnMapping("config_uuid", "$.vm_configuration[0].config_uuid", "string"), JsonColumnMapping("rawdata", "$", "dynamic")] INGESTION_PROPERTIES = IngestionProperties(database=DATABASE, table=DESTINATION_TABLE, dataFormat=DataFormat.JSON, ingestionMapping=ing_map, reportLevel=ReportLevel.FailuresAndSuccesses,flushImmediately=IS_FLUSH_IMMEDIATELY) print("Database {} Tabele {}".format(DATABASE,DESTINATION_TABLE)) BLOB_PATH = "https://" + blob_account + ".blob.core.windows.net/" + container_name + "/" + filepath + CLEAN_FILE_TOKEN print (BLOB_PATH,' ',str(file_size), ingest_source_id) BLOB_DESCRIPTOR = BlobDescriptor(BLOB_PATH, file_size, ingest_source_id) # 10 is the raw size of the data in bytes INGESTION_CLIENT.ingest_from_blob(BLOB_DESCRIPTOR,ingestion_properties=INGESTION_PROPERTIES) tc.context.properties["ingest_source_id"]=ingest_source_id min_datatime=0 max_datatime=0 total_records=1 doc_id=save_COSMOS_log(vm_uuid,deploy_uuid,config_uuid,filepath,min_datatime,max_datatime, total_records,ingest_source_id,blob_account,container_name, tc) tc.track_event(APP_INSIGHT_INGEST_EVENT_NAME, { 'FILE_PATH': filepath,'DOC_ID':doc_id,"SOURCE_ID":ingest_source_id }, { 'TOTOAL_RECORDS': total_records, 'FILE_SIZE':file_size,'MIN_DATETIME':min_datatime,'MAX_DATETIME': max_datatime }) log_msg="{} Done queuing up ingestion with Azure Data Explorer {}, Ingest SourceID {}".format(LOG_MESSAGE_HEADER,filepath,ingest_source_id) print(log_msg) tc.track_trace(log_msg) tc.flush()
def test_blob_info_json_mapping(self): """ Tests serialization of json ingestion blob info. """ validation_policy = ValidationPolicy( ValidationOptions.ValidateCsvInputConstantColumns, ValidationImplications.BestEffort) properties = IngestionProperties( database="database", table="table", dataFormat=DataFormat.json, mapping=[JsonColumnMapping("ColumnName", "jsonpath", "datatype")], additionalTags=["tag"], ingestIfNotExists=["ingestIfNotExistTags"], ingestByTags=["ingestByTags"], dropByTags=["dropByTags"], flushImmediately=True, reportLevel=ReportLevel.DoNotReport, reportMethod=ReportMethod.QueueAndTable, validationPolicy=validation_policy, ) blob = BlobDescriptor("somepath", 10) blob_info = _IngestionBlobInfo(blob, properties, deleteSourcesOnSuccess=True, authContext="authorizationContextText") self._verify_ingestion_blob_info_result(blob_info.to_json())
def get_ingestion_mapping( cls, data_format: str, mapping: IngestionMapping) -> List[ColumnMapping]: kusto_ingest_mapping = [] if data_format == "csv": # TODO: need to add __str__ to columnMapping mapping_func = lambda source_col, target_col: CsvColumnMapping( target_col.name, target_col.data_type.value, source_col.index) if data_format in ["json", "singlejson", "multijson"]: # TODO: need to add __str__ to columnMapping mapping_func = lambda source_col, target_col: JsonColumnMapping( target_col.name, f"$.{source_col.name}", cslDataType=target_col.data_type.value) for col in mapping.columns: kusto_ingest_mapping.append(mapping_func(col.source, col.target)) return kusto_ingest_mapping
def create_deft_table_json_mappings(): """A method to define json mappings to deft table.""" mappings = list() mappings.append( JsonColumnMapping(columnName="rownumber", jsonPath="$.rownumber", cslDataType="int")) mappings.append( JsonColumnMapping(columnName="rowguid", jsonPath="$.rowguid", cslDataType="string")) mappings.append( JsonColumnMapping(columnName="xdouble", jsonPath="$.xdouble", cslDataType="real")) mappings.append( JsonColumnMapping(columnName="xfloat", jsonPath="$.xfloat", cslDataType="real")) mappings.append( JsonColumnMapping(columnName="xbool", jsonPath="$.xbool", cslDataType="bool")) mappings.append( JsonColumnMapping(columnName="xint16", jsonPath="$.xint16", cslDataType="int")) mappings.append( JsonColumnMapping(columnName="xint32", jsonPath="$.xint32", cslDataType="int")) mappings.append( JsonColumnMapping(columnName="xint64", jsonPath="$.xint64", cslDataType="long")) mappings.append( JsonColumnMapping(columnName="xuint8", jsonPath="$.xuint8", cslDataType="long")) mappings.append( JsonColumnMapping(columnName="xuint16", jsonPath="$.xuint16", cslDataType="long")) mappings.append( JsonColumnMapping(columnName="xuint32", jsonPath="$.xuint32", cslDataType="long")) mappings.append( JsonColumnMapping(columnName="xuint64", jsonPath="$.xuint64", cslDataType="long")) mappings.append( JsonColumnMapping(columnName="xdate", jsonPath="$.xdate", cslDataType="datetime")) mappings.append( JsonColumnMapping(columnName="xsmalltext", jsonPath="$.xsmalltext", cslDataType="string")) mappings.append( JsonColumnMapping(columnName="xtext", jsonPath="$.xtext", cslDataType="string")) mappings.append( JsonColumnMapping(columnName="xnumberAsText", jsonPath="$.xnumberAsText", cslDataType="string")) mappings.append( JsonColumnMapping(columnName="xtime", jsonPath="$.xtime", cslDataType="timespan")) mappings.append( JsonColumnMapping(columnName="xtextWithNulls", jsonPath="$.xtextWithNulls", cslDataType="string")) mappings.append( JsonColumnMapping(columnName="xdynamicWithNulls", jsonPath="$.xdynamicWithNulls", cslDataType="dynamic")) return mappings