def write_to_db(binary_size_data, args): # connect to database cluster = "https://ingest-onnxruntimedashboarddb.southcentralus.kusto.windows.net" kcsb = KustoConnectionStringBuilder.with_az_cli_authentication(cluster) # The authentication method will be taken from the chosen KustoConnectionStringBuilder. client = QueuedIngestClient(kcsb) fields = ["build_time", "build_id", "build_project", "commit_id", "os", "arch", "build_config", "size", "Branch"] now_str = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") branch_name = os.environ.get("BUILD_SOURCEBRANCHNAME", "main") rows = [] for row in binary_size_data: rows.append( [ now_str, args.build_id, args.build_project, args.commit_hash, row["os"], row["arch"], row["build_config"], row["size"], branch_name.lower(), ] ) ingestion_props = IngestionProperties( database="powerbi", table="binary_size", data_format=DataFormat.CSV, report_level=ReportLevel.FailuresAndSuccesses, ) df = pandas.DataFrame(data=rows, columns=fields) client.ingest_from_dataframe(df, ingestion_properties=ingestion_props)
def write_to_db(coverage_data, args): # connect to database cluster = "https://ingest-onnxruntimedashboarddb.southcentralus.kusto.windows.net" kcsb = KustoConnectionStringBuilder.with_az_cli_authentication(cluster) # The authentication method will be taken from the chosen KustoConnectionStringBuilder. client = QueuedIngestClient(kcsb) fields = [ "UploadTime", "CommitId", "Coverage", "LinesCovered", "TotalLines", "OS", "Arch", "BuildConfig", "ReportURL", "Branch" ] now_str = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") rows = [[ now_str, args.commit_hash, coverage_data['coverage'], coverage_data['lines_covered'], coverage_data['lines_valid'], args.os.lower(), args.arch.lower(), args.build_config.lower(), args.report_url.lower(), args.branch.lower() ]] ingestion_props = IngestionProperties( database="powerbi", table="test_coverage", data_format=DataFormat.CSV, report_level=ReportLevel.FailuresAndSuccesses) df = pandas.DataFrame(data=rows, columns=fields) client.ingest_from_dataframe(df, ingestion_properties=ingestion_props)
def ingest_from_blob(cls, ingest_client: QueuedIngestClient, database_name: str, table_name: str, blob_url: str, data_format: DataFormat, mapping_name: str = None) -> None: """ Ingest Data from a Blob. :param ingest_client: Client to ingest data :param database_name: DB name :param table_name: Table name :param blob_url: Blob Uri :param data_format: Given data format :param mapping_name: Desired mapping name """ ingestion_properties = cls.create_ingestion_properties( database_name, table_name, data_format, mapping_name) # Tip 1: For optimal ingestion batching and performance,specify the uncompressed data size in the file descriptor instead of the default below of 0. # Otherwise, the service will determine the file size, requiring an additional s2s call, and may not be accurate for compressed files. # Tip 2: To correlate between ingestion operations in your applications and Kusto, set the source ID and log it somewhere blob_descriptor = BlobDescriptor(blob_url, size=0, source_id=str(uuid.uuid4())) ingest_client.ingest_from_blob( blob_descriptor, ingestion_properties=ingestion_properties)
def test_pop_unbalanced_queues(self): client = QueuedIngestClient("some-cluster") fake_receive = fake_receive_factory( lambda queue_name, messages_per_page=1: [mock_message(success=False) for _ in range(0, messages_per_page)] if "1" in queue_name else []) with mock.patch.object( client._resource_manager, "get_successful_ingestions_queues"), mock.patch.object( client._resource_manager, "get_failed_ingestions_queues" ) as mocked_get_failed_qs, mock.patch.object( QueueClient, "receive_messages", autospec=True, side_effect=fake_receive, ) as q_receive_mock, mock.patch.object(QueueClient, "delete_message", return_value=None): fake_failed_queue1 = _ResourceUri( "mocked_storage_account_f1", OBJECT_TYPE, "queue", "mocked_qf_1_name", ENDPOINT_SUFFIX, ) fake_failed_queue2 = _ResourceUri( "mocked_storage_account_f2", OBJECT_TYPE, "queue", "mocked_qf_2_name", ENDPOINT_SUFFIX, ) mocked_get_failed_qs.return_value = [ fake_failed_queue1, fake_failed_queue2 ] qs = KustoIngestStatusQueues(client) get_failure_actual = qs.failure.pop(6) assert len(get_failure_actual) == 6 for m in get_failure_actual: assert isinstance(m, FailureMessage) assert q_receive_mock.call_count == 3 actual = {} for call_args in q_receive_mock.call_args_list: actual[call_args[0][0].queue_name] = actual.get( call_args[0][0].queue_name, 0) + call_args[1]["messages_per_page"] assert actual[fake_failed_queue2.object_name] + actual[ fake_failed_queue1.object_name] == (4 + 4 + 6)
def test_sanity_ingest_from_file(self, mock_uuid, mock_put_message_in_queue, mock_upload_blob_from_stream, mock_aad): responses.add_callback( responses.POST, "https://ingest-somecluster.kusto.windows.net/v1/rest/mgmt", callback=request_callback, content_type="application/json" ) ingest_client = QueuedIngestClient("https://ingest-somecluster.kusto.windows.net") ingestion_properties = IngestionProperties(database="database", table="table", data_format=DataFormat.CSV) # ensure test can work when executed from within directories current_dir = os.getcwd() path_parts = ["azure-kusto-ingest", "tests", "input", "dataset.csv"] missing_path_parts = [] for path_part in path_parts: if path_part not in current_dir: missing_path_parts.append(path_part) file_path = os.path.join(current_dir, *missing_path_parts) ingest_client.ingest_from_file(file_path, ingestion_properties=ingestion_properties) # mock_put_message_in_queue assert mock_put_message_in_queue.call_count == 1 put_message_in_queue_mock_kwargs = mock_put_message_in_queue.call_args_list[0][1] queued_message_json = json.loads(put_message_in_queue_mock_kwargs["content"]) expected_url = "https://storageaccount.blob.core.windows.net/tempstorage/database__table__1111-111111-111111-1111__dataset.csv.gz?" # mock_upload_blob_from_stream # not checking the query string because it can change order, just checking it's there assert queued_message_json["BlobPath"].startswith(expected_url) is True assert len(queued_message_json["BlobPath"]) > len(expected_url) assert queued_message_json["DatabaseName"] == "database" assert queued_message_json["IgnoreSizeLimit"] is False assert queued_message_json["AdditionalProperties"]["format"] == "csv" assert queued_message_json["FlushImmediately"] is False assert queued_message_json["TableName"] == "table" assert queued_message_json["RawDataSize"] > 0 assert queued_message_json["RetainBlobOnSuccess"] is True upload_blob_kwargs = mock_upload_blob_from_stream.call_args_list[0][1] assert type(upload_blob_kwargs["data"]) == io.BytesIO
def test_simple_ingest_from_dataframe(self, mock_pid, mock_time, mock_uuid, mock_put_message_in_queue, mock_upload_blob_from_stream): responses.add_callback( responses.POST, "https://ingest-somecluster.kusto.windows.net/v1/rest/mgmt", callback=request_callback, content_type="application/json" ) ingest_client = QueuedIngestClient("https://ingest-somecluster.kusto.windows.net") ingestion_properties = IngestionProperties(database="database", table="table", data_format=DataFormat.CSV) from pandas import DataFrame fields = ["id", "name", "value"] rows = [[1, "abc", 15.3], [2, "cde", 99.9]] df = DataFrame(data=rows, columns=fields) ingest_client.ingest_from_dataframe(df, ingestion_properties=ingestion_properties) # mock_put_message_in_queue assert mock_put_message_in_queue.call_count == 1 put_message_in_queue_mock_kwargs = mock_put_message_in_queue.call_args_list[0][1] queued_message_json = json.loads(put_message_in_queue_mock_kwargs["content"]) expected_url = "https://storageaccount.blob.core.windows.net/tempstorage/database__table__1111-111111-111111-1111__df_{0}_100_1111-111111-111111-1111.csv.gz?".format( id(df) ) # mock_upload_blob_from_stream # not checking the query string because it can change order, just checking it's there assert queued_message_json["BlobPath"].startswith(expected_url) is True assert len(queued_message_json["BlobPath"]) > len(expected_url) assert queued_message_json["DatabaseName"] == "database" assert queued_message_json["IgnoreSizeLimit"] is False assert queued_message_json["AdditionalProperties"]["format"] == "csv" assert queued_message_json["FlushImmediately"] is False assert queued_message_json["TableName"] == "table" assert queued_message_json["RawDataSize"] > 0 assert queued_message_json["RetainBlobOnSuccess"] is True upload_blob_kwargs = mock_upload_blob_from_stream.call_args_list[0][1] assert type(upload_blob_kwargs["data"]) == io.BufferedReader
def test_ingest_from_file_wrong_endpoint(self): responses.add_callback( responses.POST, "https://somecluster.kusto.windows.net/v1/rest/mgmt", callback=request_error_callback, content_type="application/json" ) ingest_client = QueuedIngestClient("https://somecluster.kusto.windows.net") ingestion_properties = IngestionProperties(database="database", table="table", data_format=DataFormat.CSV) current_dir = os.getcwd() path_parts = ["azure-kusto-ingest", "tests", "input", "dataset.csv"] missing_path_parts = [] for path_part in path_parts: if path_part not in current_dir: missing_path_parts.append(path_part) file_path = os.path.join(current_dir, *missing_path_parts) with self.assertRaises(KustoInvalidEndpointError) as ex: ingest_client.ingest_from_file(file_path, ingestion_properties=ingestion_properties) self.assertEqual( ex.exception.args[0], "You are using 'DataManagement' client type, but the provided endpoint is of ServiceType 'Engine'. Initialize the client with the appropriate endpoint URI: 'https://ingest-somecluster.kusto.windows.net'", "Expected exception was not raised", )
def __init__(self, cluster, database, table, clientId, clientSecret, authority="microsoft.com", resetTable=False): """ Parameters ---------- cluster : str Azure Data Explorer (ADX) cluster address. eg, 'CDOC.kusto.windows.net' database : str Azure Data Explorer (ADX) database name. eg, 'TestDb' table : str Azure Data Explorer (ADX) table name. eg, 'OutputTable' clientId : str Azure Data Explorer (ADX) client Id that has permissions to access ADX. clientSecret : str Azure Data Explorer (ADX) access key. Used along with client Id. authority : str Azure Data Explorer (ADX) authority. Optional. When not specified, 'microsoft.com' is used. resetTable : bool Default is False. If True, the existing data in the destination table is dropped before new data is logged. """ self.running = True self.batchSize = 10000 self.flushDuration = timedelta(milliseconds = 1000) self.lastUploadTime = datetime.utcnow() self.initTable = False self.nextBatch = list() self.currentBatch = None self.lock = threading.Lock() self.resetTable = resetTable self.database = database self.table = table self.kcsbData = KustoConnectionStringBuilder.with_aad_application_key_authentication(f"https://{cluster}:443/", clientId, clientSecret, authority) self.kcsbIngest = KustoConnectionStringBuilder.with_aad_application_key_authentication(f"https://ingest-{cluster}:443/", clientId, clientSecret, authority) self.dataClient = KustoClient(self.kcsbData) self.ingestClient = QueuedIngestClient(self.kcsbIngest) self.ingestionProps = IngestionProperties(database=database, table=table,)
def setup_class(cls): # DM CS can be composed from engine CS cls.engine_cs = os.environ.get("ENGINE_CONNECTION_STRING") cls.dm_cs = os.environ.get( "DM_CONNECTION_STRING") or cls.engine_cs.replace( "//", "//ingest-") cls.app_id = os.environ.get("APP_ID") cls.app_key = os.environ.get("APP_KEY") cls.auth_id = os.environ.get("AUTH_ID") cls.test_db = os.environ.get("TEST_DATABASE") if not all([ cls.engine_cs, cls.dm_cs, cls.app_id, cls.app_key, cls.auth_id, cls.test_db ]): raise unittest.SkipTest("E2E environment is missing") # Init clients python_version = "_".join([str(v) for v in sys.version_info[:3]]) cls.test_table = "python_test_{0}_{1}_{2}".format( python_version, str(int(time.time())), random.randint(1, 100000)) cls.client = KustoClient(cls.engine_kcsb_from_env()) cls.ingest_client = QueuedIngestClient(cls.dm_kcsb_from_env()) cls.streaming_ingest_client = KustoStreamingIngestClient( cls.engine_kcsb_from_env()) cls.input_folder_path = cls.get_file_path() cls.csv_file_path = os.path.join(cls.input_folder_path, "dataset.csv") cls.tsv_file_path = os.path.join(cls.input_folder_path, "dataset.tsv") cls.zipped_csv_file_path = os.path.join(cls.input_folder_path, "dataset.csv.gz") cls.json_file_path = os.path.join(cls.input_folder_path, "dataset.json") cls.zipped_json_file_path = os.path.join(cls.input_folder_path, "dataset.jsonz.gz") cls.current_count = 0 cls.client.execute( cls.test_db, ".create table {0} (rownumber: int, rowguid: string, xdouble: real, xfloat: real, xbool: bool, xint16: int, xint32: int, xint64: long, xuint8: long, xuint16: long, xuint32: long, xuint64: long, xdate: datetime, xsmalltext: string, xtext: string, xnumberAsText: string, xtime: timespan, xtextWithNulls: string, xdynamicWithNulls: dynamic)" .format(cls.test_table), ) cls.client.execute( cls.test_db, ".create table {0} ingestion json mapping 'JsonMapping' {1}". format(cls.test_table, cls.test_table_json_mapping_reference()))
def main(): args = parse_arguments() # connect to database kcsb_ingest = KustoConnectionStringBuilder.with_az_cli_authentication(cluster_ingest) ingest_client = QueuedIngestClient(kcsb_ingest) date_time = args.datetime identifier = get_identifier(date_time, args.commit_hash, args.trt_version, args.branch) try: result_file = args.report_folder folders = os.listdir(result_file) os.chdir(result_file) tables = [fail_name, memory_name, latency_name, status_name, latency_over_time_name, specs_name, session_name] table_results = {} for table_name in tables: table_results[table_name] = pd.DataFrame() for model_group in folders: os.chdir(model_group) csv_filenames = os.listdir() for csv in csv_filenames: table = parse_csv(csv) if session_name in csv: table_results[session_name] = table_results[session_name].append(get_session(table, model_group), ignore_index=True) if specs_name in csv: table_results[specs_name] = table_results[specs_name].append(get_specs(table, args.branch, args.commit_hash, date_time), ignore_index=True) if fail_name in csv: table_results[fail_name] = table_results[fail_name].append(get_failures(table, model_group), ignore_index=True) if latency_name in csv: table_results[memory_name] = table_results[memory_name].append(get_memory(table, model_group), ignore_index=True) table_results[latency_name] = table_results[latency_name].append(get_latency(table, model_group), ignore_index=True) table_results[latency_over_time_name] = table_results[latency_over_time_name].append(get_latency_over_time(args.commit_hash, args.report_url, args.branch, table_results[latency_name]), ignore_index=True) if status_name in csv: table_results[status_name] = table_results[status_name].append(get_status(table, model_group), ignore_index=True) os.chdir(result_file) for table in tables: print('writing ' + table + ' to database') db_table_name = 'ep_model_' + table write_table(ingest_client, table_results[table], db_table_name, date_time, identifier) except BaseException as e: print(str(e)) sys.exit(1)
def test_isempty(self): client = QueuedIngestClient("some-cluster") fake_peek = fake_peek_factory(lambda queue_name, num_messages=1: [ mock_message(success=True) for _ in range(0, num_messages) ] if "qs" in queue_name else []) with mock.patch.object(client._resource_manager, "get_successful_ingestions_queues" ) as mocked_get_success_qs, mock.patch.object( client._resource_manager, "get_failed_ingestions_queues" ) as mocked_get_failed_qs, mock.patch.object( QueueClient, "peek_messages", autospec=True, side_effect=fake_peek) as q_mock: fake_failed_queue = _ResourceUri( "mocked_storage_account1", OBJECT_TYPE, "queue", "mocked_qf_name", ENDPOINT_SUFFIX, ) fake_success_queue = _ResourceUri( "mocked_storage_account2", OBJECT_TYPE, "queue", "mocked_qs_name", ENDPOINT_SUFFIX, ) mocked_get_success_qs.return_value = [fake_success_queue] mocked_get_failed_qs.return_value = [fake_failed_queue] qs = KustoIngestStatusQueues(client) assert qs.success.is_empty() is False assert qs.failure.is_empty() is True assert q_mock.call_count == 2 assert q_mock.call_args_list[0][1]["max_messages"] == 2 assert q_mock.call_args_list[1][1]["max_messages"] == 2
def main(): print("Kusto sample app is starting...") app = KustoSampleApp() app.load_configs(app.CONFIG_FILE_NAME) if app.config.authentication_mode == "UserPrompt": app.wait_for_user_to_proceed( "You will be prompted *twice* for credentials during this script. Please return to the console after authenticating." ) kusto_connection_string = Utils.Authentication.generate_connection_string( app.config.kusto_uri, app.config.authentication_mode) ingest_connection_string = Utils.Authentication.generate_connection_string( app.config.ingest_uri, app.config.authentication_mode) # Tip: Avoid creating a new Kusto/ingest client for each use.Instead, create the clients once and reuse them. if not kusto_connection_string or not ingest_connection_string: Utils.error_handler( "Connection String error. Please validate your configuration file." ) else: kusto_client = KustoClient(kusto_connection_string) ingest_client = QueuedIngestClient(ingest_connection_string) app.pre_ingestion_querying(app.config, kusto_client) if app.config.ingest_data: app.ingestion(app.config, kusto_client, ingest_client) if app.config.query_data: app.post_ingestion_querying(kusto_client, app.config.database_name, app.config.table_name, app.config.ingest_data) print("\nKusto sample app done")
class PythonAdxOutput(IOutput): """ An output component that ingests events to Azure Data Explorer (ADX) using queued ingestion. ... Attributes ---------- cluster : str Azure Data Explorer (ADX) cluster address. eg, 'CDOC.kusto.windows.net' database : str Azure Data Explorer (ADX) database name. eg, 'TestDb' table : str Azure Data Explorer (ADX) table name. eg, 'OutputTable' clientId : str Azure Data Explorer (ADX) client Id that has permissions to access ADX. clientSecret : str Azure Data Explorer (ADX) access key. Used along with client Id. authority : str Azure Data Explorer (ADX) authority. Optional. When not specified, 'microsoft.com' is used. resetTable : bool Default is False. If True, the existing data in the destination table is dropped before new data is logged. """ __namespace__ = "KqlPython" def __init__(self, cluster, database, table, clientId, clientSecret, authority="microsoft.com", resetTable=False): """ Parameters ---------- cluster : str Azure Data Explorer (ADX) cluster address. eg, 'CDOC.kusto.windows.net' database : str Azure Data Explorer (ADX) database name. eg, 'TestDb' table : str Azure Data Explorer (ADX) table name. eg, 'OutputTable' clientId : str Azure Data Explorer (ADX) client Id that has permissions to access ADX. clientSecret : str Azure Data Explorer (ADX) access key. Used along with client Id. authority : str Azure Data Explorer (ADX) authority. Optional. When not specified, 'microsoft.com' is used. resetTable : bool Default is False. If True, the existing data in the destination table is dropped before new data is logged. """ self.running = True self.batchSize = 10000 self.flushDuration = timedelta(milliseconds = 1000) self.lastUploadTime = datetime.utcnow() self.initTable = False self.nextBatch = list() self.currentBatch = None self.lock = threading.Lock() self.resetTable = resetTable self.database = database self.table = table self.kcsbData = KustoConnectionStringBuilder.with_aad_application_key_authentication(f"https://{cluster}:443/", clientId, clientSecret, authority) self.kcsbIngest = KustoConnectionStringBuilder.with_aad_application_key_authentication(f"https://ingest-{cluster}:443/", clientId, clientSecret, authority) self.dataClient = KustoClient(self.kcsbData) self.ingestClient = QueuedIngestClient(self.kcsbIngest) self.ingestionProps = IngestionProperties(database=database, table=table,) def KqlOutputAction(self,kqlOutput: KqlOutput): """Outputs events that have been processed by a KQL query""" self.OutputAction(kqlOutput.Output) def OutputAction(self,dictOutput: Dictionary): """Outputs events either to console or to custom function""" try: if self.running: # Convert C# Dictionary to Python dict txt = JsonConvert.SerializeObject(dictOutput) newEvent = json.loads(txt) # Initialize table if not self.initTable: self.CreateOrResetTable(newEvent) self.initTable = True # Check if it's time to upload a batch if (len(self.nextBatch) >= self.batchSize) or (datetime.utcnow() > self.lastUploadTime + self.flushDuration): self.UploadBatch() self.nextBatch.append(newEvent) except: self.running = False print(sys.exc_info()) print(traceback.print_exc()) def OutputError(self,error): """Outputs errors to console""" self.running = False print(error) def OutputCompleted(self): """Signals the end of the input event stream""" if self.running: self.UploadBatch() self.running = False def Stop(self): """Signals end of program""" print('\nCompleted!') print('\nThank you for using Real-time KQL!') def UploadBatch(self): """Ingests batch of events to Kusto using queued ingestion""" self.lock.acquire() try: if self.currentBatch != None: raise Exception('Upload must not be called before the batch currently being uploaded is completed') self.currentBatch = self.nextBatch self.nextBatch = list() if len(self.currentBatch) > 0: df = DataFrame(self.currentBatch) self.ingestClient.ingest_from_dataframe(df, ingestion_properties=self.ingestionProps) print(f"{len(self.currentBatch)},", end = " ") self.currentBatch = None self.lastUploadTime = datetime.utcnow() except: self.running = False print(sys.exc_info()) print(traceback.print_exc()) finally: self.lock.release() def CreateOrResetTable(self,data): """Creates or resets ADX table""" if self.resetTable: # Dropping table self.dataClient.execute(self.database, f".drop table {self.table} ifexists") # Create-merge table tableMapping = "(" for item in data: tableMapping += f"{item}: {self.GetColumnType(data[item])}, " tableMapping = tableMapping[:-2] + ")" createMergeTableCommand = f".create-merge table {self.table} " + tableMapping self.dataClient.execute(self.database, createMergeTableCommand) def GetColumnType(self,item): """Returns Kusto data type string equivalent of python object""" if isinstance(item, str): return "string" elif isinstance(item, bool): return "bool" elif isinstance(item, datetime): return "datetime" elif isinstance(item, timedelta): return "timespan" elif isinstance(item, (int, bytes, bytearray)): return "int" elif isinstance(item, float): return "real" else: return "dynamic"
kcsb = KustoConnectionStringBuilder.with_aad_managed_service_identity_authentication( cluster, client_id=user_assigned_client_id) # In case you want to authenticate with AAD username and password username = "******" password = "******" kcsb = KustoConnectionStringBuilder.with_aad_user_password_authentication( cluster, username, password, authority_id) # In case you want to authenticate with AAD device code. # Please note that if you choose this option, you'll need to autenticate for every new instance that is initialized. # It is highly recommended to create one instance and use it for all of your queries. kcsb = KustoConnectionStringBuilder.with_aad_device_authentication(cluster) # The authentication method will be taken from the chosen KustoConnectionStringBuilder. client = QueuedIngestClient(kcsb) # there are more options for authenticating - see azure-kusto-data samples ################################################################## ## INGESTION ## ################################################################## # there are a lot of useful properties, make sure to go over docs and check them out ingestion_props = IngestionProperties( database="{database_name}", table="{table_name}", data_format=DataFormat.CSV, # in case status update for success are also required # report_level=ReportLevel.FailuresAndSuccesses, # in case a mapping is required
def main(): """ Entry point of this script. Uploads data produced by benchmarking scripts to the database. """ args = parse_arguments() # connect to database kcsb_ingest = KustoConnectionStringBuilder.with_az_cli_authentication( CLUSTER_INGEST) ingest_client = QueuedIngestClient(kcsb_ingest) identifier = get_identifier(args.commit_datetime, args.commit_hash, args.trt_version, args.branch) upload_time = datetime.datetime.now(tz=datetime.timezone.utc).replace( microsecond=0) try: result_file = args.report_folder folders = os.listdir(result_file) os.chdir(result_file) tables = [ fail_name, memory_name, latency_name, status_name, latency_over_time_name, specs_name, session_name, ] table_results = {} for table_name in tables: table_results[table_name] = pd.DataFrame() for model_group in folders: os.chdir(model_group) csv_filenames = os.listdir() for csv in csv_filenames: table = pd.read_csv(csv) if session_name in csv: table_results[session_name] = table_results[ session_name].append(get_session(table, model_group), ignore_index=True) elif specs_name in csv: table_results[specs_name] = table_results[ specs_name].append( get_specs(table, args.branch, args.commit_hash, args.commit_datetime), ignore_index=True, ) elif fail_name in csv: table_results[fail_name] = table_results[fail_name].append( get_failures(table, model_group), ignore_index=True) elif latency_name in csv: table_results[memory_name] = table_results[ memory_name].append(get_memory(table, model_group), ignore_index=True) table_results[latency_name] = table_results[ latency_name].append(get_latency(table, model_group), ignore_index=True) if not table_results[latency_name].empty: table_results[latency_over_time_name] = table_results[ latency_over_time_name].append( get_latency_over_time( args.commit_hash, args.report_url, args.branch, table_results[latency_name], ), ignore_index=True, ) elif status_name in csv: table_results[status_name] = table_results[ status_name].append(get_status(table, model_group), ignore_index=True) os.chdir(result_file) for table in tables: print("writing " + table + " to database") db_table_name = "ep_model_" + table write_table( ingest_client, table_results[table], db_table_name, upload_time, identifier, ) except BaseException as e: print(str(e)) sys.exit(1)
def test_peek(self): client = QueuedIngestClient("some-cluster") fake_peek = fake_peek_factory(lambda queue_name, num_messages=1: [ mock_message(success=True) if "qs" in queue_name else mock_message( success=False) for _ in range(0, num_messages) ]) with mock.patch.object(client._resource_manager, "get_successful_ingestions_queues" ) as mocked_get_success_qs, mock.patch.object( client._resource_manager, "get_failed_ingestions_queues" ) as mocked_get_failed_qs, mock.patch.object( QueueClient, "peek_messages", autospec=True, side_effect=fake_peek) as q_mock: fake_failed_queue1 = _ResourceUri( "mocked_storage_account_f1", OBJECT_TYPE, "queue", "mocked_qf_1_name", ENDPOINT_SUFFIX, ) fake_failed_queue2 = _ResourceUri( "mocked_storage_account_f2", OBJECT_TYPE, "queue", "mocked_qf_2_name", ENDPOINT_SUFFIX, ) fake_success_queue = _ResourceUri( "mocked_storage_account2", OBJECT_TYPE, "queue", "mocked_qs_name", ENDPOINT_SUFFIX, ) mocked_get_success_qs.return_value = [fake_success_queue] mocked_get_failed_qs.return_value = [ fake_failed_queue1, fake_failed_queue2 ] qs = KustoIngestStatusQueues(client) peek_success_actual = qs.success.peek() peek_failure_actual = qs.failure.peek(6) assert len(peek_success_actual) == 1 for m in peek_failure_actual: assert isinstance(m, FailureMessage) is True for m in peek_success_actual: assert isinstance(m, SuccessMessage) is True assert len(peek_failure_actual) == 6 actual = {} assert len(QueueClient.peek_messages.call_args_list) == 3 for call_args in q_mock.call_args_list: actual[call_args[0][0].queue_name] = actual.get( call_args[0][0].queue_name, 0) + call_args[1]["max_messages"] assert actual[fake_failed_queue2.object_name] == 4 assert actual[fake_failed_queue1.object_name] == 4 assert actual[fake_success_queue.object_name] == 2
def test_pop(self): client = QueuedIngestClient("some-cluster") fake_receive = fake_receive_factory( lambda queue_name, num_messages=1: [ mock_message(success=True) if "qs" in queue_name else mock_message(success=False) for _ in range(0, num_messages) ]) with mock.patch.object(client._resource_manager, "get_successful_ingestions_queues" ) as mocked_get_success_qs, mock.patch.object( client._resource_manager, "get_failed_ingestions_queues" ) as mocked_get_failed_qs, mock.patch.object( QueueClient, "receive_messages", autospec=True, side_effect=fake_receive, ) as q_receive_mock, mock.patch.object( QueueClient, "delete_message", return_value=None) as q_del_mock: fake_failed_queue1 = _ResourceUri( "mocked_storage_account_f1", OBJECT_TYPE, "queue", "mocked_qf_1_name", ENDPOINT_SUFFIX, ) fake_failed_queue2 = _ResourceUri( "mocked_storage_account_f2", OBJECT_TYPE, "queue", "mocked_qf_2_name", ENDPOINT_SUFFIX, ) fake_success_queue = _ResourceUri( "mocked_storage_account2", OBJECT_TYPE, "queue", "mocked_qs_name", ENDPOINT_SUFFIX, ) mocked_get_success_qs.return_value = [fake_success_queue] mocked_get_failed_qs.return_value = [ fake_failed_queue1, fake_failed_queue2 ] qs = KustoIngestStatusQueues(client) get_success_actual = qs.success.pop() get_failure_actual = qs.failure.pop(6) assert len(get_success_actual) == 1 assert len(get_failure_actual) == 6 for m in get_failure_actual: assert isinstance(m, FailureMessage) for m in get_success_actual: assert isinstance(m, SuccessMessage) assert q_receive_mock.call_count == 3 assert q_del_mock.call_count == len(get_success_actual) + len( get_failure_actual) assert q_receive_mock.call_args_list[0][1][ "messages_per_page"] == 2 actual = { q_receive_mock.call_args_list[1][0][0].queue_name: q_receive_mock.call_args_list[1][1]["messages_per_page"], q_receive_mock.call_args_list[2][0][0].queue_name: q_receive_mock.call_args_list[2][1]["messages_per_page"], } assert actual[fake_failed_queue2.object_name] == 4 assert actual[fake_failed_queue1.object_name] == 4
def test_init(self): client = QueuedIngestClient("some-cluster") qs = KustoIngestStatusQueues(client) assert qs.success.message_cls == SuccessMessage assert qs.failure.message_cls == FailureMessage