Esempio n. 1
0
def write_to_db(binary_size_data, args):
    # connect to database
    cluster = "https://ingest-onnxruntimedashboarddb.southcentralus.kusto.windows.net"
    kcsb = KustoConnectionStringBuilder.with_az_cli_authentication(cluster)
    # The authentication method will be taken from the chosen KustoConnectionStringBuilder.
    client = QueuedIngestClient(kcsb)
    fields = ["build_time", "build_id", "build_project", "commit_id", "os", "arch", "build_config", "size", "Branch"]
    now_str = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
    branch_name = os.environ.get("BUILD_SOURCEBRANCHNAME", "main")
    rows = []
    for row in binary_size_data:
        rows.append(
            [
                now_str,
                args.build_id,
                args.build_project,
                args.commit_hash,
                row["os"],
                row["arch"],
                row["build_config"],
                row["size"],
                branch_name.lower(),
            ]
        )
    ingestion_props = IngestionProperties(
        database="powerbi",
        table="binary_size",
        data_format=DataFormat.CSV,
        report_level=ReportLevel.FailuresAndSuccesses,
    )
    df = pandas.DataFrame(data=rows, columns=fields)
    client.ingest_from_dataframe(df, ingestion_properties=ingestion_props)
Esempio n. 2
0
def write_to_db(coverage_data, args):
    # connect to database
    cluster = "https://ingest-onnxruntimedashboarddb.southcentralus.kusto.windows.net"
    kcsb = KustoConnectionStringBuilder.with_az_cli_authentication(cluster)
    # The authentication method will be taken from the chosen KustoConnectionStringBuilder.
    client = QueuedIngestClient(kcsb)
    fields = [
        "UploadTime", "CommitId", "Coverage", "LinesCovered", "TotalLines",
        "OS", "Arch", "BuildConfig", "ReportURL", "Branch"
    ]
    now_str = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
    rows = [[
        now_str, args.commit_hash, coverage_data['coverage'],
        coverage_data['lines_covered'], coverage_data['lines_valid'],
        args.os.lower(),
        args.arch.lower(),
        args.build_config.lower(),
        args.report_url.lower(),
        args.branch.lower()
    ]]
    ingestion_props = IngestionProperties(
        database="powerbi",
        table="test_coverage",
        data_format=DataFormat.CSV,
        report_level=ReportLevel.FailuresAndSuccesses)
    df = pandas.DataFrame(data=rows, columns=fields)
    client.ingest_from_dataframe(df, ingestion_properties=ingestion_props)
Esempio n. 3
0
        def ingest_from_blob(cls,
                             ingest_client: QueuedIngestClient,
                             database_name: str,
                             table_name: str,
                             blob_url: str,
                             data_format: DataFormat,
                             mapping_name: str = None) -> None:
            """
            Ingest Data from a Blob.
            :param ingest_client: Client to ingest data
            :param database_name: DB name
            :param table_name: Table name
            :param blob_url: Blob Uri
            :param data_format: Given data format
            :param mapping_name: Desired mapping name
            """
            ingestion_properties = cls.create_ingestion_properties(
                database_name, table_name, data_format, mapping_name)

            # Tip 1: For optimal ingestion batching and performance,specify the uncompressed data size in the file descriptor instead of the default below of 0.
            # Otherwise, the service will determine the file size, requiring an additional s2s call, and may not be accurate for compressed files.
            # Tip 2: To correlate between ingestion operations in your applications and Kusto, set the source ID and log it somewhere
            blob_descriptor = BlobDescriptor(blob_url,
                                             size=0,
                                             source_id=str(uuid.uuid4()))
            ingest_client.ingest_from_blob(
                blob_descriptor, ingestion_properties=ingestion_properties)
    def test_pop_unbalanced_queues(self):
        client = QueuedIngestClient("some-cluster")

        fake_receive = fake_receive_factory(
            lambda queue_name, messages_per_page=1:
            [mock_message(success=False) for _ in range(0, messages_per_page)]
            if "1" in queue_name else [])
        with mock.patch.object(
                client._resource_manager,
                "get_successful_ingestions_queues"), mock.patch.object(
                    client._resource_manager, "get_failed_ingestions_queues"
                ) as mocked_get_failed_qs, mock.patch.object(
                    QueueClient,
                    "receive_messages",
                    autospec=True,
                    side_effect=fake_receive,
                ) as q_receive_mock, mock.patch.object(QueueClient,
                                                       "delete_message",
                                                       return_value=None):

            fake_failed_queue1 = _ResourceUri(
                "mocked_storage_account_f1",
                OBJECT_TYPE,
                "queue",
                "mocked_qf_1_name",
                ENDPOINT_SUFFIX,
            )
            fake_failed_queue2 = _ResourceUri(
                "mocked_storage_account_f2",
                OBJECT_TYPE,
                "queue",
                "mocked_qf_2_name",
                ENDPOINT_SUFFIX,
            )

            mocked_get_failed_qs.return_value = [
                fake_failed_queue1, fake_failed_queue2
            ]

            qs = KustoIngestStatusQueues(client)

            get_failure_actual = qs.failure.pop(6)

            assert len(get_failure_actual) == 6

            for m in get_failure_actual:
                assert isinstance(m, FailureMessage)

            assert q_receive_mock.call_count == 3

            actual = {}

            for call_args in q_receive_mock.call_args_list:
                actual[call_args[0][0].queue_name] = actual.get(
                    call_args[0][0].queue_name,
                    0) + call_args[1]["messages_per_page"]

            assert actual[fake_failed_queue2.object_name] + actual[
                fake_failed_queue1.object_name] == (4 + 4 + 6)
    def test_sanity_ingest_from_file(self, mock_uuid, mock_put_message_in_queue, mock_upload_blob_from_stream, mock_aad):
        responses.add_callback(
            responses.POST, "https://ingest-somecluster.kusto.windows.net/v1/rest/mgmt", callback=request_callback, content_type="application/json"
        )

        ingest_client = QueuedIngestClient("https://ingest-somecluster.kusto.windows.net")
        ingestion_properties = IngestionProperties(database="database", table="table", data_format=DataFormat.CSV)

        # ensure test can work when executed from within directories
        current_dir = os.getcwd()
        path_parts = ["azure-kusto-ingest", "tests", "input", "dataset.csv"]
        missing_path_parts = []
        for path_part in path_parts:
            if path_part not in current_dir:
                missing_path_parts.append(path_part)

        file_path = os.path.join(current_dir, *missing_path_parts)

        ingest_client.ingest_from_file(file_path, ingestion_properties=ingestion_properties)

        # mock_put_message_in_queue
        assert mock_put_message_in_queue.call_count == 1

        put_message_in_queue_mock_kwargs = mock_put_message_in_queue.call_args_list[0][1]

        queued_message_json = json.loads(put_message_in_queue_mock_kwargs["content"])
        expected_url = "https://storageaccount.blob.core.windows.net/tempstorage/database__table__1111-111111-111111-1111__dataset.csv.gz?"
        # mock_upload_blob_from_stream
        # not checking the query string because it can change order, just checking it's there
        assert queued_message_json["BlobPath"].startswith(expected_url) is True
        assert len(queued_message_json["BlobPath"]) > len(expected_url)
        assert queued_message_json["DatabaseName"] == "database"
        assert queued_message_json["IgnoreSizeLimit"] is False
        assert queued_message_json["AdditionalProperties"]["format"] == "csv"
        assert queued_message_json["FlushImmediately"] is False
        assert queued_message_json["TableName"] == "table"
        assert queued_message_json["RawDataSize"] > 0
        assert queued_message_json["RetainBlobOnSuccess"] is True

        upload_blob_kwargs = mock_upload_blob_from_stream.call_args_list[0][1]

        assert type(upload_blob_kwargs["data"]) == io.BytesIO
    def test_simple_ingest_from_dataframe(self, mock_pid, mock_time, mock_uuid, mock_put_message_in_queue, mock_upload_blob_from_stream):
        responses.add_callback(
            responses.POST, "https://ingest-somecluster.kusto.windows.net/v1/rest/mgmt", callback=request_callback, content_type="application/json"
        )

        ingest_client = QueuedIngestClient("https://ingest-somecluster.kusto.windows.net")
        ingestion_properties = IngestionProperties(database="database", table="table", data_format=DataFormat.CSV)

        from pandas import DataFrame

        fields = ["id", "name", "value"]
        rows = [[1, "abc", 15.3], [2, "cde", 99.9]]
        df = DataFrame(data=rows, columns=fields)

        ingest_client.ingest_from_dataframe(df, ingestion_properties=ingestion_properties)

        # mock_put_message_in_queue
        assert mock_put_message_in_queue.call_count == 1

        put_message_in_queue_mock_kwargs = mock_put_message_in_queue.call_args_list[0][1]

        queued_message_json = json.loads(put_message_in_queue_mock_kwargs["content"])
        expected_url = "https://storageaccount.blob.core.windows.net/tempstorage/database__table__1111-111111-111111-1111__df_{0}_100_1111-111111-111111-1111.csv.gz?".format(
            id(df)
        )
        # mock_upload_blob_from_stream
        # not checking the query string because it can change order, just checking it's there
        assert queued_message_json["BlobPath"].startswith(expected_url) is True
        assert len(queued_message_json["BlobPath"]) > len(expected_url)
        assert queued_message_json["DatabaseName"] == "database"
        assert queued_message_json["IgnoreSizeLimit"] is False
        assert queued_message_json["AdditionalProperties"]["format"] == "csv"
        assert queued_message_json["FlushImmediately"] is False
        assert queued_message_json["TableName"] == "table"
        assert queued_message_json["RawDataSize"] > 0
        assert queued_message_json["RetainBlobOnSuccess"] is True

        upload_blob_kwargs = mock_upload_blob_from_stream.call_args_list[0][1]

        assert type(upload_blob_kwargs["data"]) == io.BufferedReader
    def test_ingest_from_file_wrong_endpoint(self):
        responses.add_callback(
            responses.POST, "https://somecluster.kusto.windows.net/v1/rest/mgmt", callback=request_error_callback, content_type="application/json"
        )

        ingest_client = QueuedIngestClient("https://somecluster.kusto.windows.net")
        ingestion_properties = IngestionProperties(database="database", table="table", data_format=DataFormat.CSV)

        current_dir = os.getcwd()
        path_parts = ["azure-kusto-ingest", "tests", "input", "dataset.csv"]
        missing_path_parts = []
        for path_part in path_parts:
            if path_part not in current_dir:
                missing_path_parts.append(path_part)

        file_path = os.path.join(current_dir, *missing_path_parts)

        with self.assertRaises(KustoInvalidEndpointError) as ex:
            ingest_client.ingest_from_file(file_path, ingestion_properties=ingestion_properties)
        self.assertEqual(
            ex.exception.args[0],
            "You are using 'DataManagement' client type, but the provided endpoint is of ServiceType 'Engine'. Initialize the client with the appropriate endpoint URI: 'https://ingest-somecluster.kusto.windows.net'",
            "Expected exception was not raised",
        )
Esempio n. 8
0
    def __init__(self, cluster, database, table, clientId, clientSecret, authority="microsoft.com", resetTable=False):
        """
        Parameters
        ----------
        cluster : str
            Azure Data Explorer (ADX) cluster address. eg, 'CDOC.kusto.windows.net'
        database : str
            Azure Data Explorer (ADX) database name. eg, 'TestDb'
        table : str
            Azure Data Explorer (ADX) table name. eg, 'OutputTable'
        clientId : str
            Azure Data Explorer (ADX) client Id that has permissions to access ADX.
        clientSecret : str
            Azure Data Explorer (ADX) access key. Used along with client Id.
        authority : str
            Azure Data Explorer (ADX) authority. Optional. When not specified, 'microsoft.com' is used.
        resetTable : bool
            Default is False. If True, the existing data in the destination table is dropped before new data is logged.
        """
        self.running = True
        self.batchSize = 10000
        self.flushDuration = timedelta(milliseconds = 1000)
        self.lastUploadTime = datetime.utcnow()
        self.initTable = False
        self.nextBatch = list()
        self.currentBatch = None
        self.lock = threading.Lock()

        self.resetTable = resetTable
        self.database = database
        self.table = table
        self.kcsbData = KustoConnectionStringBuilder.with_aad_application_key_authentication(f"https://{cluster}:443/", clientId, clientSecret, authority)
        self.kcsbIngest = KustoConnectionStringBuilder.with_aad_application_key_authentication(f"https://ingest-{cluster}:443/", clientId, clientSecret, authority)
        self.dataClient = KustoClient(self.kcsbData)
        self.ingestClient = QueuedIngestClient(self.kcsbIngest)
        self.ingestionProps = IngestionProperties(database=database, table=table,)
Esempio n. 9
0
    def setup_class(cls):
        # DM CS can be composed from engine CS
        cls.engine_cs = os.environ.get("ENGINE_CONNECTION_STRING")
        cls.dm_cs = os.environ.get(
            "DM_CONNECTION_STRING") or cls.engine_cs.replace(
                "//", "//ingest-")
        cls.app_id = os.environ.get("APP_ID")
        cls.app_key = os.environ.get("APP_KEY")
        cls.auth_id = os.environ.get("AUTH_ID")
        cls.test_db = os.environ.get("TEST_DATABASE")

        if not all([
                cls.engine_cs, cls.dm_cs, cls.app_id, cls.app_key, cls.auth_id,
                cls.test_db
        ]):
            raise unittest.SkipTest("E2E environment is missing")

        # Init clients
        python_version = "_".join([str(v) for v in sys.version_info[:3]])
        cls.test_table = "python_test_{0}_{1}_{2}".format(
            python_version, str(int(time.time())), random.randint(1, 100000))
        cls.client = KustoClient(cls.engine_kcsb_from_env())
        cls.ingest_client = QueuedIngestClient(cls.dm_kcsb_from_env())
        cls.streaming_ingest_client = KustoStreamingIngestClient(
            cls.engine_kcsb_from_env())

        cls.input_folder_path = cls.get_file_path()

        cls.csv_file_path = os.path.join(cls.input_folder_path, "dataset.csv")
        cls.tsv_file_path = os.path.join(cls.input_folder_path, "dataset.tsv")
        cls.zipped_csv_file_path = os.path.join(cls.input_folder_path,
                                                "dataset.csv.gz")
        cls.json_file_path = os.path.join(cls.input_folder_path,
                                          "dataset.json")
        cls.zipped_json_file_path = os.path.join(cls.input_folder_path,
                                                 "dataset.jsonz.gz")

        cls.current_count = 0

        cls.client.execute(
            cls.test_db,
            ".create table {0} (rownumber: int, rowguid: string, xdouble: real, xfloat: real, xbool: bool, xint16: int, xint32: int, xint64: long, xuint8: long, xuint16: long, xuint32: long, xuint64: long, xdate: datetime, xsmalltext: string, xtext: string, xnumberAsText: string, xtime: timespan, xtextWithNulls: string, xdynamicWithNulls: dynamic)"
            .format(cls.test_table),
        )
        cls.client.execute(
            cls.test_db,
            ".create table {0} ingestion json mapping 'JsonMapping' {1}".
            format(cls.test_table, cls.test_table_json_mapping_reference()))
Esempio n. 10
0
def main():
    
    args = parse_arguments()
    
    # connect to database
    kcsb_ingest = KustoConnectionStringBuilder.with_az_cli_authentication(cluster_ingest)
    ingest_client = QueuedIngestClient(kcsb_ingest)
    date_time = args.datetime
    identifier = get_identifier(date_time, args.commit_hash, args.trt_version, args.branch)
    
    try:
        result_file = args.report_folder

        folders = os.listdir(result_file)
        os.chdir(result_file)

        tables = [fail_name, memory_name, latency_name, status_name, latency_over_time_name, specs_name, session_name]
        table_results = {}
        for table_name in tables:
            table_results[table_name] = pd.DataFrame()

        for model_group in folders:
            os.chdir(model_group)
            csv_filenames = os.listdir()
            for csv in csv_filenames:
                table = parse_csv(csv)
                if session_name in csv: 
                    table_results[session_name] = table_results[session_name].append(get_session(table, model_group), ignore_index=True)
                if specs_name in csv: 
                    table_results[specs_name] = table_results[specs_name].append(get_specs(table, args.branch, args.commit_hash, date_time), ignore_index=True)
                if fail_name in csv:
                    table_results[fail_name] = table_results[fail_name].append(get_failures(table, model_group), ignore_index=True)
                if latency_name in csv:
                    table_results[memory_name] = table_results[memory_name].append(get_memory(table, model_group), ignore_index=True)
                    table_results[latency_name] = table_results[latency_name].append(get_latency(table, model_group), ignore_index=True)
                    table_results[latency_over_time_name] = table_results[latency_over_time_name].append(get_latency_over_time(args.commit_hash, args.report_url, args.branch, table_results[latency_name]), ignore_index=True)
                if status_name in csv: 
                    table_results[status_name] = table_results[status_name].append(get_status(table, model_group), ignore_index=True)
            os.chdir(result_file)
        for table in tables: 
            print('writing ' + table + ' to database')
            db_table_name = 'ep_model_' + table
            write_table(ingest_client, table_results[table], db_table_name, date_time, identifier)

    except BaseException as e: 
        print(str(e))
        sys.exit(1)
Esempio n. 11
0
    def test_isempty(self):
        client = QueuedIngestClient("some-cluster")

        fake_peek = fake_peek_factory(lambda queue_name, num_messages=1: [
            mock_message(success=True) for _ in range(0, num_messages)
        ] if "qs" in queue_name else [])
        with mock.patch.object(client._resource_manager,
                               "get_successful_ingestions_queues"
                               ) as mocked_get_success_qs, mock.patch.object(
                                   client._resource_manager,
                                   "get_failed_ingestions_queues"
                               ) as mocked_get_failed_qs, mock.patch.object(
                                   QueueClient,
                                   "peek_messages",
                                   autospec=True,
                                   side_effect=fake_peek) as q_mock:
            fake_failed_queue = _ResourceUri(
                "mocked_storage_account1",
                OBJECT_TYPE,
                "queue",
                "mocked_qf_name",
                ENDPOINT_SUFFIX,
            )
            fake_success_queue = _ResourceUri(
                "mocked_storage_account2",
                OBJECT_TYPE,
                "queue",
                "mocked_qs_name",
                ENDPOINT_SUFFIX,
            )

            mocked_get_success_qs.return_value = [fake_success_queue]
            mocked_get_failed_qs.return_value = [fake_failed_queue]

            qs = KustoIngestStatusQueues(client)

            assert qs.success.is_empty() is False
            assert qs.failure.is_empty() is True

            assert q_mock.call_count == 2
            assert q_mock.call_args_list[0][1]["max_messages"] == 2
            assert q_mock.call_args_list[1][1]["max_messages"] == 2
Esempio n. 12
0
def main():
    print("Kusto sample app is starting...")

    app = KustoSampleApp()
    app.load_configs(app.CONFIG_FILE_NAME)

    if app.config.authentication_mode == "UserPrompt":
        app.wait_for_user_to_proceed(
            "You will be prompted *twice* for credentials during this script. Please return to the console after authenticating."
        )

    kusto_connection_string = Utils.Authentication.generate_connection_string(
        app.config.kusto_uri, app.config.authentication_mode)
    ingest_connection_string = Utils.Authentication.generate_connection_string(
        app.config.ingest_uri, app.config.authentication_mode)

    # Tip: Avoid creating a new Kusto/ingest client for each use.Instead, create the clients once and reuse them.
    if not kusto_connection_string or not ingest_connection_string:
        Utils.error_handler(
            "Connection String error. Please validate your configuration file."
        )
    else:
        kusto_client = KustoClient(kusto_connection_string)
        ingest_client = QueuedIngestClient(ingest_connection_string)

        app.pre_ingestion_querying(app.config, kusto_client)

        if app.config.ingest_data:
            app.ingestion(app.config, kusto_client, ingest_client)

        if app.config.query_data:
            app.post_ingestion_querying(kusto_client, app.config.database_name,
                                        app.config.table_name,
                                        app.config.ingest_data)

    print("\nKusto sample app done")
Esempio n. 13
0
class PythonAdxOutput(IOutput):
    """
    An output component that ingests events to Azure Data Explorer (ADX) using queued ingestion.
    
    ...

    Attributes
    ----------
    cluster : str
        Azure Data Explorer (ADX) cluster address. eg, 'CDOC.kusto.windows.net'
    database : str
        Azure Data Explorer (ADX) database name. eg, 'TestDb'
    table : str
        Azure Data Explorer (ADX) table name. eg, 'OutputTable'
    clientId : str
        Azure Data Explorer (ADX) client Id that has permissions to access ADX.
    clientSecret : str
        Azure Data Explorer (ADX) access key. Used along with client Id.
    authority : str
        Azure Data Explorer (ADX) authority. Optional. When not specified, 'microsoft.com' is used.
    resetTable : bool
        Default is False. If True, the existing data in the destination table is dropped before new data is logged.
    """
    __namespace__ = "KqlPython"

    def __init__(self, cluster, database, table, clientId, clientSecret, authority="microsoft.com", resetTable=False):
        """
        Parameters
        ----------
        cluster : str
            Azure Data Explorer (ADX) cluster address. eg, 'CDOC.kusto.windows.net'
        database : str
            Azure Data Explorer (ADX) database name. eg, 'TestDb'
        table : str
            Azure Data Explorer (ADX) table name. eg, 'OutputTable'
        clientId : str
            Azure Data Explorer (ADX) client Id that has permissions to access ADX.
        clientSecret : str
            Azure Data Explorer (ADX) access key. Used along with client Id.
        authority : str
            Azure Data Explorer (ADX) authority. Optional. When not specified, 'microsoft.com' is used.
        resetTable : bool
            Default is False. If True, the existing data in the destination table is dropped before new data is logged.
        """
        self.running = True
        self.batchSize = 10000
        self.flushDuration = timedelta(milliseconds = 1000)
        self.lastUploadTime = datetime.utcnow()
        self.initTable = False
        self.nextBatch = list()
        self.currentBatch = None
        self.lock = threading.Lock()

        self.resetTable = resetTable
        self.database = database
        self.table = table
        self.kcsbData = KustoConnectionStringBuilder.with_aad_application_key_authentication(f"https://{cluster}:443/", clientId, clientSecret, authority)
        self.kcsbIngest = KustoConnectionStringBuilder.with_aad_application_key_authentication(f"https://ingest-{cluster}:443/", clientId, clientSecret, authority)
        self.dataClient = KustoClient(self.kcsbData)
        self.ingestClient = QueuedIngestClient(self.kcsbIngest)
        self.ingestionProps = IngestionProperties(database=database, table=table,)

    def KqlOutputAction(self,kqlOutput: KqlOutput):
        """Outputs events that have been processed by a KQL query"""
        self.OutputAction(kqlOutput.Output)

    def OutputAction(self,dictOutput: Dictionary):
        """Outputs events either to console or to custom function"""
        try:
            if self.running:
                # Convert C# Dictionary to Python dict
                txt = JsonConvert.SerializeObject(dictOutput)
                newEvent = json.loads(txt)
                
                # Initialize table
                if not self.initTable:
                    self.CreateOrResetTable(newEvent)
                    self.initTable = True

                # Check if it's time to upload a batch
                if (len(self.nextBatch) >= self.batchSize) or (datetime.utcnow() > self.lastUploadTime + self.flushDuration):
                    self.UploadBatch()

                self.nextBatch.append(newEvent)
        except:
            self.running = False
            print(sys.exc_info())
            print(traceback.print_exc())

    def OutputError(self,error):
        """Outputs errors to console"""
        self.running = False 
        print(error)
    
    def OutputCompleted(self):
        """Signals the end of the input event stream"""
        if self.running:
            self.UploadBatch()
        self.running = False

    def Stop(self):
        """Signals end of program"""
        print('\nCompleted!')
        print('\nThank you for using Real-time KQL!')

    def UploadBatch(self):
        """Ingests batch of events to Kusto using queued ingestion"""
        self.lock.acquire()
        try:
            if self.currentBatch != None:
                raise Exception('Upload must not be called before the batch currently being uploaded is completed')

            self.currentBatch = self.nextBatch
            self.nextBatch = list()

            if len(self.currentBatch) > 0:
                df = DataFrame(self.currentBatch)
                self.ingestClient.ingest_from_dataframe(df, ingestion_properties=self.ingestionProps)
                print(f"{len(self.currentBatch)},", end = " ")

            self.currentBatch = None
            self.lastUploadTime = datetime.utcnow()
        except:
            self.running = False
            print(sys.exc_info())
            print(traceback.print_exc())
        finally:
            self.lock.release()
    
    def CreateOrResetTable(self,data):
        """Creates or resets ADX table"""
        if self.resetTable:
            # Dropping table
            self.dataClient.execute(self.database, f".drop table {self.table} ifexists")

        # Create-merge table
        tableMapping = "("
        for item in data:
            tableMapping += f"{item}: {self.GetColumnType(data[item])}, "
        tableMapping = tableMapping[:-2] + ")"
        createMergeTableCommand = f".create-merge table {self.table} " + tableMapping
        self.dataClient.execute(self.database, createMergeTableCommand)

    def GetColumnType(self,item):
        """Returns Kusto data type string equivalent of python object"""
        if isinstance(item, str):
            return "string"
        elif isinstance(item, bool):
            return "bool"
        elif isinstance(item, datetime):
            return "datetime"
        elif isinstance(item, timedelta):
            return "timespan"
        elif isinstance(item, (int, bytes, bytearray)):
            return "int"
        elif isinstance(item, float):
            return "real"
        else:
            return "dynamic"
Esempio n. 14
0
kcsb = KustoConnectionStringBuilder.with_aad_managed_service_identity_authentication(
    cluster, client_id=user_assigned_client_id)

# In case you want to authenticate with AAD username and password
username = "******"
password = "******"
kcsb = KustoConnectionStringBuilder.with_aad_user_password_authentication(
    cluster, username, password, authority_id)

# In case you want to authenticate with AAD device code.
# Please note that if you choose this option, you'll need to autenticate for every new instance that is initialized.
# It is highly recommended to create one instance and use it for all of your queries.
kcsb = KustoConnectionStringBuilder.with_aad_device_authentication(cluster)

# The authentication method will be taken from the chosen KustoConnectionStringBuilder.
client = QueuedIngestClient(kcsb)

# there are more options for authenticating - see azure-kusto-data samples

##################################################################
##                        INGESTION                             ##
##################################################################

# there are a lot of useful properties, make sure to go over docs and check them out
ingestion_props = IngestionProperties(
    database="{database_name}",
    table="{table_name}",
    data_format=DataFormat.CSV,
    # in case status update for success are also required
    # report_level=ReportLevel.FailuresAndSuccesses,
    # in case a mapping is required
Esempio n. 15
0
def main():
    """
    Entry point of this script. Uploads data produced by benchmarking scripts to the database.
    """

    args = parse_arguments()

    # connect to database
    kcsb_ingest = KustoConnectionStringBuilder.with_az_cli_authentication(
        CLUSTER_INGEST)
    ingest_client = QueuedIngestClient(kcsb_ingest)
    identifier = get_identifier(args.commit_datetime, args.commit_hash,
                                args.trt_version, args.branch)
    upload_time = datetime.datetime.now(tz=datetime.timezone.utc).replace(
        microsecond=0)

    try:
        result_file = args.report_folder

        folders = os.listdir(result_file)
        os.chdir(result_file)

        tables = [
            fail_name,
            memory_name,
            latency_name,
            status_name,
            latency_over_time_name,
            specs_name,
            session_name,
        ]
        table_results = {}
        for table_name in tables:
            table_results[table_name] = pd.DataFrame()

        for model_group in folders:
            os.chdir(model_group)
            csv_filenames = os.listdir()
            for csv in csv_filenames:
                table = pd.read_csv(csv)
                if session_name in csv:
                    table_results[session_name] = table_results[
                        session_name].append(get_session(table, model_group),
                                             ignore_index=True)
                elif specs_name in csv:
                    table_results[specs_name] = table_results[
                        specs_name].append(
                            get_specs(table, args.branch, args.commit_hash,
                                      args.commit_datetime),
                            ignore_index=True,
                        )
                elif fail_name in csv:
                    table_results[fail_name] = table_results[fail_name].append(
                        get_failures(table, model_group), ignore_index=True)
                elif latency_name in csv:
                    table_results[memory_name] = table_results[
                        memory_name].append(get_memory(table, model_group),
                                            ignore_index=True)
                    table_results[latency_name] = table_results[
                        latency_name].append(get_latency(table, model_group),
                                             ignore_index=True)
                    if not table_results[latency_name].empty:
                        table_results[latency_over_time_name] = table_results[
                            latency_over_time_name].append(
                                get_latency_over_time(
                                    args.commit_hash,
                                    args.report_url,
                                    args.branch,
                                    table_results[latency_name],
                                ),
                                ignore_index=True,
                            )
                elif status_name in csv:
                    table_results[status_name] = table_results[
                        status_name].append(get_status(table, model_group),
                                            ignore_index=True)
            os.chdir(result_file)
        for table in tables:
            print("writing " + table + " to database")
            db_table_name = "ep_model_" + table
            write_table(
                ingest_client,
                table_results[table],
                db_table_name,
                upload_time,
                identifier,
            )

    except BaseException as e:
        print(str(e))
        sys.exit(1)
Esempio n. 16
0
    def test_peek(self):
        client = QueuedIngestClient("some-cluster")

        fake_peek = fake_peek_factory(lambda queue_name, num_messages=1: [
            mock_message(success=True) if "qs" in queue_name else mock_message(
                success=False) for _ in range(0, num_messages)
        ])

        with mock.patch.object(client._resource_manager,
                               "get_successful_ingestions_queues"
                               ) as mocked_get_success_qs, mock.patch.object(
                                   client._resource_manager,
                                   "get_failed_ingestions_queues"
                               ) as mocked_get_failed_qs, mock.patch.object(
                                   QueueClient,
                                   "peek_messages",
                                   autospec=True,
                                   side_effect=fake_peek) as q_mock:

            fake_failed_queue1 = _ResourceUri(
                "mocked_storage_account_f1",
                OBJECT_TYPE,
                "queue",
                "mocked_qf_1_name",
                ENDPOINT_SUFFIX,
            )
            fake_failed_queue2 = _ResourceUri(
                "mocked_storage_account_f2",
                OBJECT_TYPE,
                "queue",
                "mocked_qf_2_name",
                ENDPOINT_SUFFIX,
            )
            fake_success_queue = _ResourceUri(
                "mocked_storage_account2",
                OBJECT_TYPE,
                "queue",
                "mocked_qs_name",
                ENDPOINT_SUFFIX,
            )

            mocked_get_success_qs.return_value = [fake_success_queue]
            mocked_get_failed_qs.return_value = [
                fake_failed_queue1, fake_failed_queue2
            ]

            qs = KustoIngestStatusQueues(client)

            peek_success_actual = qs.success.peek()
            peek_failure_actual = qs.failure.peek(6)

            assert len(peek_success_actual) == 1

            for m in peek_failure_actual:
                assert isinstance(m, FailureMessage) is True

            for m in peek_success_actual:
                assert isinstance(m, SuccessMessage) is True

            assert len(peek_failure_actual) == 6

            actual = {}

            assert len(QueueClient.peek_messages.call_args_list) == 3

            for call_args in q_mock.call_args_list:
                actual[call_args[0][0].queue_name] = actual.get(
                    call_args[0][0].queue_name,
                    0) + call_args[1]["max_messages"]

            assert actual[fake_failed_queue2.object_name] == 4
            assert actual[fake_failed_queue1.object_name] == 4
            assert actual[fake_success_queue.object_name] == 2
Esempio n. 17
0
    def test_pop(self):
        client = QueuedIngestClient("some-cluster")

        fake_receive = fake_receive_factory(
            lambda queue_name, num_messages=1: [
                mock_message(success=True) if "qs" in queue_name else
                mock_message(success=False) for _ in range(0, num_messages)
            ])

        with mock.patch.object(client._resource_manager,
                               "get_successful_ingestions_queues"
                               ) as mocked_get_success_qs, mock.patch.object(
                                   client._resource_manager,
                                   "get_failed_ingestions_queues"
                               ) as mocked_get_failed_qs, mock.patch.object(
                                   QueueClient,
                                   "receive_messages",
                                   autospec=True,
                                   side_effect=fake_receive,
                               ) as q_receive_mock, mock.patch.object(
                                   QueueClient,
                                   "delete_message",
                                   return_value=None) as q_del_mock:

            fake_failed_queue1 = _ResourceUri(
                "mocked_storage_account_f1",
                OBJECT_TYPE,
                "queue",
                "mocked_qf_1_name",
                ENDPOINT_SUFFIX,
            )
            fake_failed_queue2 = _ResourceUri(
                "mocked_storage_account_f2",
                OBJECT_TYPE,
                "queue",
                "mocked_qf_2_name",
                ENDPOINT_SUFFIX,
            )
            fake_success_queue = _ResourceUri(
                "mocked_storage_account2",
                OBJECT_TYPE,
                "queue",
                "mocked_qs_name",
                ENDPOINT_SUFFIX,
            )

            mocked_get_success_qs.return_value = [fake_success_queue]
            mocked_get_failed_qs.return_value = [
                fake_failed_queue1, fake_failed_queue2
            ]

            qs = KustoIngestStatusQueues(client)

            get_success_actual = qs.success.pop()
            get_failure_actual = qs.failure.pop(6)

            assert len(get_success_actual) == 1
            assert len(get_failure_actual) == 6

            for m in get_failure_actual:
                assert isinstance(m, FailureMessage)

            for m in get_success_actual:
                assert isinstance(m, SuccessMessage)

            assert q_receive_mock.call_count == 3
            assert q_del_mock.call_count == len(get_success_actual) + len(
                get_failure_actual)

            assert q_receive_mock.call_args_list[0][1][
                "messages_per_page"] == 2

            actual = {
                q_receive_mock.call_args_list[1][0][0].queue_name:
                q_receive_mock.call_args_list[1][1]["messages_per_page"],
                q_receive_mock.call_args_list[2][0][0].queue_name:
                q_receive_mock.call_args_list[2][1]["messages_per_page"],
            }

            assert actual[fake_failed_queue2.object_name] == 4
            assert actual[fake_failed_queue1.object_name] == 4
Esempio n. 18
0
    def test_init(self):
        client = QueuedIngestClient("some-cluster")
        qs = KustoIngestStatusQueues(client)

        assert qs.success.message_cls == SuccessMessage
        assert qs.failure.message_cls == FailureMessage