Esempio n. 1
0
def ingest_to_ADX(filepath, telemetry_block_blob_service, container_name, blob_account, file_size, tc,vm_uuid,deploy_uuid,config_uuid):
    ingest_source_id=str(uuid.uuid4())
    KCSB_INGEST = KustoConnectionStringBuilder.with_aad_device_authentication(DATA_INGESTION_URI)
    KCSB_INGEST.authority_id = APP_AAD_TENANT_ID
    INGESTION_CLIENT = KustoIngestClient(KCSB_INGEST)
    ing_map=[JsonColumnMapping("vm_uuid", "$.vm_uuid", "string"),
             JsonColumnMapping("deploy_uuid", "$.deployment_description[0].deploy_uuid", "string"),
             JsonColumnMapping("config_uuid", "$.vm_configuration[0].config_uuid", "string"),
             JsonColumnMapping("rawdata", "$", "dynamic")]
        
    INGESTION_PROPERTIES  = IngestionProperties(database=DATABASE, table=DESTINATION_TABLE, dataFormat=DataFormat.JSON, ingestionMapping=ing_map, reportLevel=ReportLevel.FailuresAndSuccesses,flushImmediately=IS_FLUSH_IMMEDIATELY)                                                                                                                                                          

    print("Database {} Tabele {}".format(DATABASE,DESTINATION_TABLE))
    
    BLOB_PATH = "https://" + blob_account + ".blob.core.windows.net/" + container_name + "/" + filepath + CLEAN_FILE_TOKEN

    print (BLOB_PATH,' ',str(file_size), ingest_source_id)
    BLOB_DESCRIPTOR = BlobDescriptor(BLOB_PATH, file_size, ingest_source_id) # 10 is the raw size of the data in bytes
    INGESTION_CLIENT.ingest_from_blob(BLOB_DESCRIPTOR,ingestion_properties=INGESTION_PROPERTIES)
    tc.context.properties["ingest_source_id"]=ingest_source_id

    min_datatime=0
    max_datatime=0
    total_records=1

    doc_id=save_COSMOS_log(vm_uuid,deploy_uuid,config_uuid,filepath,min_datatime,max_datatime, total_records,ingest_source_id,blob_account,container_name, tc)

    tc.track_event(APP_INSIGHT_INGEST_EVENT_NAME, { 'FILE_PATH': filepath,'DOC_ID':doc_id,"SOURCE_ID":ingest_source_id }, { 'TOTOAL_RECORDS': total_records, 'FILE_SIZE':file_size,'MIN_DATETIME':min_datatime,'MAX_DATETIME': max_datatime })
    log_msg="{} Done queuing up ingestion with Azure Data Explorer {}, Ingest SourceID {}".format(LOG_MESSAGE_HEADER,filepath,ingest_source_id)
    print(log_msg)
    tc.track_trace(log_msg)
    tc.flush()
Esempio n. 2
0
 def test_blob_info_json_mapping(self):
     """ Tests serialization of json ingestion blob info. """
     validation_policy = ValidationPolicy(
         ValidationOptions.ValidateCsvInputConstantColumns,
         ValidationImplications.BestEffort)
     properties = IngestionProperties(
         database="database",
         table="table",
         dataFormat=DataFormat.json,
         mapping=[JsonColumnMapping("ColumnName", "jsonpath", "datatype")],
         additionalTags=["tag"],
         ingestIfNotExists=["ingestIfNotExistTags"],
         ingestByTags=["ingestByTags"],
         dropByTags=["dropByTags"],
         flushImmediately=True,
         reportLevel=ReportLevel.DoNotReport,
         reportMethod=ReportMethod.QueueAndTable,
         validationPolicy=validation_policy,
     )
     blob = BlobDescriptor("somepath", 10)
     blob_info = _IngestionBlobInfo(blob,
                                    properties,
                                    deleteSourcesOnSuccess=True,
                                    authContext="authorizationContextText")
     self._verify_ingestion_blob_info_result(blob_info.to_json())
Esempio n. 3
0
    def get_ingestion_mapping(
            cls, data_format: str,
            mapping: IngestionMapping) -> List[ColumnMapping]:
        kusto_ingest_mapping = []

        if data_format == "csv":
            # TODO: need to add __str__ to columnMapping
            mapping_func = lambda source_col, target_col: CsvColumnMapping(
                target_col.name, target_col.data_type.value, source_col.index)
        if data_format in ["json", "singlejson", "multijson"]:
            # TODO: need to add __str__ to columnMapping
            mapping_func = lambda source_col, target_col: JsonColumnMapping(
                target_col.name,
                f"$.{source_col.name}",
                cslDataType=target_col.data_type.value)

        for col in mapping.columns:
            kusto_ingest_mapping.append(mapping_func(col.source, col.target))

        return kusto_ingest_mapping
Esempio n. 4
0
 def create_deft_table_json_mappings():
     """A method to define json mappings to deft table."""
     mappings = list()
     mappings.append(
         JsonColumnMapping(columnName="rownumber",
                           jsonPath="$.rownumber",
                           cslDataType="int"))
     mappings.append(
         JsonColumnMapping(columnName="rowguid",
                           jsonPath="$.rowguid",
                           cslDataType="string"))
     mappings.append(
         JsonColumnMapping(columnName="xdouble",
                           jsonPath="$.xdouble",
                           cslDataType="real"))
     mappings.append(
         JsonColumnMapping(columnName="xfloat",
                           jsonPath="$.xfloat",
                           cslDataType="real"))
     mappings.append(
         JsonColumnMapping(columnName="xbool",
                           jsonPath="$.xbool",
                           cslDataType="bool"))
     mappings.append(
         JsonColumnMapping(columnName="xint16",
                           jsonPath="$.xint16",
                           cslDataType="int"))
     mappings.append(
         JsonColumnMapping(columnName="xint32",
                           jsonPath="$.xint32",
                           cslDataType="int"))
     mappings.append(
         JsonColumnMapping(columnName="xint64",
                           jsonPath="$.xint64",
                           cslDataType="long"))
     mappings.append(
         JsonColumnMapping(columnName="xuint8",
                           jsonPath="$.xuint8",
                           cslDataType="long"))
     mappings.append(
         JsonColumnMapping(columnName="xuint16",
                           jsonPath="$.xuint16",
                           cslDataType="long"))
     mappings.append(
         JsonColumnMapping(columnName="xuint32",
                           jsonPath="$.xuint32",
                           cslDataType="long"))
     mappings.append(
         JsonColumnMapping(columnName="xuint64",
                           jsonPath="$.xuint64",
                           cslDataType="long"))
     mappings.append(
         JsonColumnMapping(columnName="xdate",
                           jsonPath="$.xdate",
                           cslDataType="datetime"))
     mappings.append(
         JsonColumnMapping(columnName="xsmalltext",
                           jsonPath="$.xsmalltext",
                           cslDataType="string"))
     mappings.append(
         JsonColumnMapping(columnName="xtext",
                           jsonPath="$.xtext",
                           cslDataType="string"))
     mappings.append(
         JsonColumnMapping(columnName="xnumberAsText",
                           jsonPath="$.xnumberAsText",
                           cslDataType="string"))
     mappings.append(
         JsonColumnMapping(columnName="xtime",
                           jsonPath="$.xtime",
                           cslDataType="timespan"))
     mappings.append(
         JsonColumnMapping(columnName="xtextWithNulls",
                           jsonPath="$.xtextWithNulls",
                           cslDataType="string"))
     mappings.append(
         JsonColumnMapping(columnName="xdynamicWithNulls",
                           jsonPath="$.xdynamicWithNulls",
                           cslDataType="dynamic"))
     return mappings