Exemple #1
0
def write_to_db(binary_size_data, args):
    # connect to database
    cluster = "https://ingest-onnxruntimedashboarddb.southcentralus.kusto.windows.net"
    kcsb = KustoConnectionStringBuilder.with_az_cli_authentication(cluster)
    # The authentication method will be taken from the chosen KustoConnectionStringBuilder.
    client = QueuedIngestClient(kcsb)
    fields = ["build_time", "build_id", "build_project", "commit_id", "os", "arch", "build_config", "size", "Branch"]
    now_str = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
    branch_name = os.environ.get("BUILD_SOURCEBRANCHNAME", "main")
    rows = []
    for row in binary_size_data:
        rows.append(
            [
                now_str,
                args.build_id,
                args.build_project,
                args.commit_hash,
                row["os"],
                row["arch"],
                row["build_config"],
                row["size"],
                branch_name.lower(),
            ]
        )
    ingestion_props = IngestionProperties(
        database="powerbi",
        table="binary_size",
        data_format=DataFormat.CSV,
        report_level=ReportLevel.FailuresAndSuccesses,
    )
    df = pandas.DataFrame(data=rows, columns=fields)
    client.ingest_from_dataframe(df, ingestion_properties=ingestion_props)
Exemple #2
0
def write_to_db(coverage_data, args):
    # connect to database
    cluster = "https://ingest-onnxruntimedashboarddb.southcentralus.kusto.windows.net"
    kcsb = KustoConnectionStringBuilder.with_az_cli_authentication(cluster)
    # The authentication method will be taken from the chosen KustoConnectionStringBuilder.
    client = QueuedIngestClient(kcsb)
    fields = [
        "UploadTime", "CommitId", "Coverage", "LinesCovered", "TotalLines",
        "OS", "Arch", "BuildConfig", "ReportURL", "Branch"
    ]
    now_str = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
    rows = [[
        now_str, args.commit_hash, coverage_data['coverage'],
        coverage_data['lines_covered'], coverage_data['lines_valid'],
        args.os.lower(),
        args.arch.lower(),
        args.build_config.lower(),
        args.report_url.lower(),
        args.branch.lower()
    ]]
    ingestion_props = IngestionProperties(
        database="powerbi",
        table="test_coverage",
        data_format=DataFormat.CSV,
        report_level=ReportLevel.FailuresAndSuccesses)
    df = pandas.DataFrame(data=rows, columns=fields)
    client.ingest_from_dataframe(df, ingestion_properties=ingestion_props)
    def test_simple_ingest_from_dataframe(self, mock_pid, mock_time, mock_uuid, mock_put_message_in_queue, mock_upload_blob_from_stream):
        responses.add_callback(
            responses.POST, "https://ingest-somecluster.kusto.windows.net/v1/rest/mgmt", callback=request_callback, content_type="application/json"
        )

        ingest_client = QueuedIngestClient("https://ingest-somecluster.kusto.windows.net")
        ingestion_properties = IngestionProperties(database="database", table="table", data_format=DataFormat.CSV)

        from pandas import DataFrame

        fields = ["id", "name", "value"]
        rows = [[1, "abc", 15.3], [2, "cde", 99.9]]
        df = DataFrame(data=rows, columns=fields)

        ingest_client.ingest_from_dataframe(df, ingestion_properties=ingestion_properties)

        # mock_put_message_in_queue
        assert mock_put_message_in_queue.call_count == 1

        put_message_in_queue_mock_kwargs = mock_put_message_in_queue.call_args_list[0][1]

        queued_message_json = json.loads(put_message_in_queue_mock_kwargs["content"])
        expected_url = "https://storageaccount.blob.core.windows.net/tempstorage/database__table__1111-111111-111111-1111__df_{0}_100_1111-111111-111111-1111.csv.gz?".format(
            id(df)
        )
        # mock_upload_blob_from_stream
        # not checking the query string because it can change order, just checking it's there
        assert queued_message_json["BlobPath"].startswith(expected_url) is True
        assert len(queued_message_json["BlobPath"]) > len(expected_url)
        assert queued_message_json["DatabaseName"] == "database"
        assert queued_message_json["IgnoreSizeLimit"] is False
        assert queued_message_json["AdditionalProperties"]["format"] == "csv"
        assert queued_message_json["FlushImmediately"] is False
        assert queued_message_json["TableName"] == "table"
        assert queued_message_json["RawDataSize"] > 0
        assert queued_message_json["RetainBlobOnSuccess"] is True

        upload_blob_kwargs = mock_upload_blob_from_stream.call_args_list[0][1]

        assert type(upload_blob_kwargs["data"]) == io.BufferedReader
Exemple #4
0
class PythonAdxOutput(IOutput):
    """
    An output component that ingests events to Azure Data Explorer (ADX) using queued ingestion.
    
    ...

    Attributes
    ----------
    cluster : str
        Azure Data Explorer (ADX) cluster address. eg, 'CDOC.kusto.windows.net'
    database : str
        Azure Data Explorer (ADX) database name. eg, 'TestDb'
    table : str
        Azure Data Explorer (ADX) table name. eg, 'OutputTable'
    clientId : str
        Azure Data Explorer (ADX) client Id that has permissions to access ADX.
    clientSecret : str
        Azure Data Explorer (ADX) access key. Used along with client Id.
    authority : str
        Azure Data Explorer (ADX) authority. Optional. When not specified, 'microsoft.com' is used.
    resetTable : bool
        Default is False. If True, the existing data in the destination table is dropped before new data is logged.
    """
    __namespace__ = "KqlPython"

    def __init__(self, cluster, database, table, clientId, clientSecret, authority="microsoft.com", resetTable=False):
        """
        Parameters
        ----------
        cluster : str
            Azure Data Explorer (ADX) cluster address. eg, 'CDOC.kusto.windows.net'
        database : str
            Azure Data Explorer (ADX) database name. eg, 'TestDb'
        table : str
            Azure Data Explorer (ADX) table name. eg, 'OutputTable'
        clientId : str
            Azure Data Explorer (ADX) client Id that has permissions to access ADX.
        clientSecret : str
            Azure Data Explorer (ADX) access key. Used along with client Id.
        authority : str
            Azure Data Explorer (ADX) authority. Optional. When not specified, 'microsoft.com' is used.
        resetTable : bool
            Default is False. If True, the existing data in the destination table is dropped before new data is logged.
        """
        self.running = True
        self.batchSize = 10000
        self.flushDuration = timedelta(milliseconds = 1000)
        self.lastUploadTime = datetime.utcnow()
        self.initTable = False
        self.nextBatch = list()
        self.currentBatch = None
        self.lock = threading.Lock()

        self.resetTable = resetTable
        self.database = database
        self.table = table
        self.kcsbData = KustoConnectionStringBuilder.with_aad_application_key_authentication(f"https://{cluster}:443/", clientId, clientSecret, authority)
        self.kcsbIngest = KustoConnectionStringBuilder.with_aad_application_key_authentication(f"https://ingest-{cluster}:443/", clientId, clientSecret, authority)
        self.dataClient = KustoClient(self.kcsbData)
        self.ingestClient = QueuedIngestClient(self.kcsbIngest)
        self.ingestionProps = IngestionProperties(database=database, table=table,)

    def KqlOutputAction(self,kqlOutput: KqlOutput):
        """Outputs events that have been processed by a KQL query"""
        self.OutputAction(kqlOutput.Output)

    def OutputAction(self,dictOutput: Dictionary):
        """Outputs events either to console or to custom function"""
        try:
            if self.running:
                # Convert C# Dictionary to Python dict
                txt = JsonConvert.SerializeObject(dictOutput)
                newEvent = json.loads(txt)
                
                # Initialize table
                if not self.initTable:
                    self.CreateOrResetTable(newEvent)
                    self.initTable = True

                # Check if it's time to upload a batch
                if (len(self.nextBatch) >= self.batchSize) or (datetime.utcnow() > self.lastUploadTime + self.flushDuration):
                    self.UploadBatch()

                self.nextBatch.append(newEvent)
        except:
            self.running = False
            print(sys.exc_info())
            print(traceback.print_exc())

    def OutputError(self,error):
        """Outputs errors to console"""
        self.running = False 
        print(error)
    
    def OutputCompleted(self):
        """Signals the end of the input event stream"""
        if self.running:
            self.UploadBatch()
        self.running = False

    def Stop(self):
        """Signals end of program"""
        print('\nCompleted!')
        print('\nThank you for using Real-time KQL!')

    def UploadBatch(self):
        """Ingests batch of events to Kusto using queued ingestion"""
        self.lock.acquire()
        try:
            if self.currentBatch != None:
                raise Exception('Upload must not be called before the batch currently being uploaded is completed')

            self.currentBatch = self.nextBatch
            self.nextBatch = list()

            if len(self.currentBatch) > 0:
                df = DataFrame(self.currentBatch)
                self.ingestClient.ingest_from_dataframe(df, ingestion_properties=self.ingestionProps)
                print(f"{len(self.currentBatch)},", end = " ")

            self.currentBatch = None
            self.lastUploadTime = datetime.utcnow()
        except:
            self.running = False
            print(sys.exc_info())
            print(traceback.print_exc())
        finally:
            self.lock.release()
    
    def CreateOrResetTable(self,data):
        """Creates or resets ADX table"""
        if self.resetTable:
            # Dropping table
            self.dataClient.execute(self.database, f".drop table {self.table} ifexists")

        # Create-merge table
        tableMapping = "("
        for item in data:
            tableMapping += f"{item}: {self.GetColumnType(data[item])}, "
        tableMapping = tableMapping[:-2] + ")"
        createMergeTableCommand = f".create-merge table {self.table} " + tableMapping
        self.dataClient.execute(self.database, createMergeTableCommand)

    def GetColumnType(self,item):
        """Returns Kusto data type string equivalent of python object"""
        if isinstance(item, str):
            return "string"
        elif isinstance(item, bool):
            return "bool"
        elif isinstance(item, datetime):
            return "datetime"
        elif isinstance(item, timedelta):
            return "timespan"
        elif isinstance(item, (int, bytes, bytearray)):
            return "int"
        elif isinstance(item, float):
            return "real"
        else:
            return "dynamic"
Exemple #5
0
# ingest from blob
blob_descriptor = BlobDescriptor(
    "https://{path_to_blob}.csv.gz?sp=rl&st=2020-05-20T13:38:37Z&se=2020-05-21T13:38:37Z&sv=2019-10-10&sr=c&sig=xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx",
    10,
)  # 10 is the raw size of the data in bytes.
client.ingest_from_blob(blob_descriptor, ingestion_properties=ingestion_props)

# ingest from dataframe
import pandas

fields = ["id", "name", "value"]
rows = [[1, "abc", 15.3], [2, "cde", 99.9]]

df = pandas.DataFrame(data=rows, columns=fields)

client.ingest_from_dataframe(df, ingestion_properties=ingestion_props)

# ingest a whole folder.
import os

path = "folder/path"
[
    client.ingest_from_file(f, ingestion_properties=ingestion_props)
    for f in os.listdir(path)
]

##################################################################
##                        INGESTION STATUS                      ##
##################################################################

# if status updates are required, something like this can be done