def create(cls, server, querybuilders, login=False): if server is None: raise ValueError("Failed to create client, no server provided.") client = KustoClient(server) client = cls.__inject_queries(client, querybuilders) # force the user to authenticate before returning if login: client.execute(cls.kustoDatabase, ".show version") return client
def get_client(cluster): """ get cached, authenticated client for given cluster """ global _client_cache c = _client_cache.get(cluster) if c is None: c = KustoClient( KustoConnectionStringBuilder.with_aad_device_authentication( cluster)) c.execute('VSO', 'print "a" | take 0') _client_cache[cluster] = c return c
def run_query(self, query, user): kcsb = KustoConnectionStringBuilder.with_aad_application_key_authentication( connection_string=self.configuration["cluster"], aad_app_id=self.configuration["azure_ad_client_id"], app_key=self.configuration["azure_ad_client_secret"], authority_id=self.configuration["azure_ad_tenant_id"], ) client = KustoClient(kcsb) db = self.configuration["database"] try: response = client.execute(db, query) result_cols = response.primary_results[0].columns result_rows = response.primary_results[0].rows columns = [] rows = [] for c in result_cols: columns.append( { "name": c.column_name, "friendly_name": c.column_name, "type": TYPES_MAP.get(c.column_type, None), } ) # rows must be [{'column1': value, 'column2': value}] for row in result_rows: rows.append(row.to_dict()) error = None data = {"columns": columns, "rows": rows} json_data = json_dumps(data) except KustoServiceError as err: json_data = None try: error = err.args[1][0]["error"]["@message"] except (IndexError, KeyError): error = err.args[1] except KeyboardInterrupt: json_data = None error = "Query cancelled by user." return json_data, error
def kusto_output(): try: kcsb = KustoConnectionStringBuilder.with_aad_application_key_authentication( SOCAlertConsumer.cluster, SOCAlertConsumer.client_id, SOCAlertConsumer.client_secret, SOCAlertConsumer.authority_id) logger.AppLogging.auditlogger.info( "Successfully loaded Kusto Connection Strings" + " " + SOCAlertConsumer.cluster + " with AppID " + SOCAlertConsumer.client_id) except RuntimeError as ie: logger.AppLogging.auditlogger.error( "Unable to import Kusto Connection Strings. " "Please check your configuration" + SOCAlertConsumer.cluster + "with AppID" + SOCAlertConsumer.client_id) logger.AppLogging.auditlogger.error(str(ie)) client = KustoClient(kcsb) kusto_query = open(".//KustoQuery//SOCAlerts.csl", "r") # change to forward slash for linux (//) query = kusto_query.read() kusto_query.close() try: response = client.execute(SOCAlertConsumer.db, query) logger.AppLogging.auditlogger.info( "Successfully received response from Kusto") logger.AppLogging.auditlogger.info( "Query output is saved to output folder") except RuntimeError as r: logger.AppLogging.auditlogger.error( "There was an error in receiving response from Kusto") logger.AppLogging.auditlogger.error(str(r)) for row in response.primary_results: filetime = time.strftime("%Y%m%d-%H%M%S") file = open(".//output//alerts-" + filetime + ".json", "x") # forward slash works fine on Win & nix file.write(str(row)) file.close()
cslDataType="timespan")) mappings.append( JsonColumnMapping(columnName="xtextWithNulls", jsonPath="$.xtextWithNulls", cslDataType="string")) mappings.append( JsonColumnMapping(columnName="xdynamicWithNulls", jsonPath="$.xdynamicWithNulls", cslDataType="dynamic")) return mappings client = KustoClient("https://toshetah.kusto.windows.net") ingest_client = KustoIngestClient("https://ingest-toshetah.kusto.windows.net") ingest_status_q = KustoIngestStatusQueues(ingest_client) client.execute("PythonTest", ".drop table Deft ifexists") @pytest.mark.run(order=1) def test_csv_ingest_non_existing_table(): csv_ingest_props = IngestionProperties( "PythonTest", "Deft", dataFormat=DataFormat.csv, mapping=Helpers.create_deft_table_csv_mappings(), reportLevel=ReportLevel.FailuresAndSuccesses, ) csv_file_path = os.path.join(os.getcwd(), "azure-kusto-ingest", "tests", "input", "dataset.csv") zipped_csv_file_path = os.path.join(os.getcwd(), "azure-kusto-ingest", "tests", "input", "dataset.csv.gz")
cluster, client_id, PEM, thumbprint, authority_id) client = KustoClient(kcsb) # In case you want to authenticate with the logged in AAD user. client = KustoClient(cluster) ###################################################### ## QUERY ## ###################################################### # once authenticated, usage is as following db = "Samples" query = "StormEvents | take 10" response = client.execute(db, query) # iterating over rows is possible for row in response.primary_results[0]: # printing specific columns by index print("value at 0 {}".format(row[0])) print("\n") # printing specific columns by name print("EventType:{}".format(row["EventType"])) # tables are serializeable, so: with open("results.json", "w+") as f: f.write(str(response.primary_results[0])) # we also support dataframes: dataframe = dataframe_from_result_table(response.primary_results[0])
return mappings cluster = "Dadubovs1.westus" # "toshetah" db_name = "TestingDatabase" # "PythonTest" table_name = "Deft" engine_kcsb = KustoConnectionStringBuilder.with_aad_device_authentication( "https://{}.kusto.windows.net".format(cluster)) dm_kcsb = KustoConnectionStringBuilder.with_aad_device_authentication( "https://ingest-{}.kusto.windows.net".format(cluster)) client = KustoClient(engine_kcsb) ingest_client = KustoIngestClient(dm_kcsb) ingest_status_q = KustoIngestStatusQueues(ingest_client) client.execute(db_name, ".drop table {} ifexists".format(table_name)) @pytest.mark.run(order=1) def test_csv_ingest_non_existing_table(): csv_ingest_props = IngestionProperties( db_name, table_name, dataFormat=DataFormat.csv, mapping=Helpers.create_deft_table_csv_mappings(), reportLevel=ReportLevel.FailuresAndSuccesses, ) csv_file_path = os.path.join(os.getcwd(), "azure-kusto-ingest", "tests", "input", "dataset.csv") zipped_csv_file_path = os.path.join(os.getcwd(), "azure-kusto-ingest", "tests", "input", "dataset.csv.gz")
class Kusto_Client(object): """ Kusto client wrapper for Python. KustoClient works with both 2.x and 3.x flavors of Python. All primitive types are supported. KustoClient takes care of ADAL authentication, parsing response and giving you typed result set, and offers familiar Python DB API. Test are run using nose. Examples -------- To use KustoClient, you can choose betwen two ways of authentication. For the first option, you'll need to have your own AAD application and know your client credentials (client_id and client_secret). >>> kusto_cluster = 'https://help.kusto.windows.net' >>> kusto_client = KustoClient(kusto_cluster, client_id, client_secret='your_app_secret') For the second option, you can use KustoClient's client id and authenticate using your username and password. >>> kusto_cluster = 'https://help.kusto.windows.net' >>> client_id = 'e07cf1fb-c6a6-4668-b21a-f74731afa19a' >>> kusto_client = KustoClient(kusto_cluster, client_id, username='******', password='******')""" _DEFAULT_CLIENTID = "db662dc1-0cfe-4e1c-a843-19a68e65be58" # kusto client app, don't know app name # _DEFAULT_CLIENTID = "8430759c-5626-4577-b151-d0755f5355d8" # kusto client app, don't know app name def __init__(self, conn_kv): """ Kusto Client constructor. Parameters ---------- kusto_cluster : str Kusto cluster endpoint. Example: https://help.kusto.windows.net client_id : str The AAD application ID of the application making the request to Kusto client_secret : str The AAD application key of the application making the request to Kusto. if this is given, then username/password should not be. username : str The username of the user making the request to Kusto. if this is given, then password must follow and the client_secret should not be given. password : str The password matching the username of the user making the request to Kusto authority : 'microsoft.com', optional In case your tenant is not microsoft please use this param. """ kusto_cluster = "https://{0}.kusto.windows.net".format( conn_kv[ConnStrKeys.CLUSTER]) if all([ conn_kv.get(ConnStrKeys.USERNAME), conn_kv.get(ConnStrKeys.PASSWORD) ]): kcsb = KustoConnectionStringBuilder.with_aad_user_password_authentication( kusto_cluster, conn_kv.get(ConnStrKeys.USERNAME), conn_kv.get(ConnStrKeys.PASSWORD)) if conn_kv.get(ConnStrKeys.TENANT) is not None: kcsb.authority_id = conn_kv.get(ConnStrKeys.TENANT) elif all([ conn_kv.get(ConnStrKeys.CLIENTID), conn_kv.get(ConnStrKeys.CLIENTSECRET) ]): kcsb = KustoConnectionStringBuilder.with_aad_application_key_authentication( kusto_cluster, conn_kv.get(ConnStrKeys.CLIENTID), conn_kv.get(ConnStrKeys.CLIENTSECRET), conn_kv.get(ConnStrKeys.TENANT)) elif all([ conn_kv.get(ConnStrKeys.CLIENTID), conn_kv.get(ConnStrKeys.CERTIFICATE), conn_kv.get(ConnStrKeys.CERTIFICATE_THUMBPRINT) ]): kcsb = KustoConnectionStringBuilder.with_aad_application_certificate_authentication( kusto_cluster, conn_kv.get(ConnStrKeys.CLIENTID), conn_kv.get(ConnStrKeys.CERTIFICATE), conn_kv.get(ConnStrKeys.CERTIFICATE_THUMBPRINT), conn_kv.get(ConnStrKeys.TENANT), ) else: kcsb = KustoConnectionStringBuilder.with_aad_device_authentication( kusto_cluster) if conn_kv.get(ConnStrKeys.TENANT) is not None: kcsb.authority_id = conn_kv.get(ConnStrKeys.TENANT) self.client = KustoClient(kcsb) # replace aadhelper to use remote browser in interactive mode self.client._aad_helper = _MyAadHelper(kcsb, self._DEFAULT_CLIENTID) self.mgmt_endpoint_version = "v2" if self.client._mgmt_endpoint.endswith( "v2/rest/query") else "v1" self.query_endpoint_version = "v2" if self.client._query_endpoint.endswith( "v2/rest/query") else "v1" def execute(self, kusto_database, query, accept_partial_results=False, timeout=None): """ Execute a simple query or management command Parameters ---------- kusto_database : str Database against query will be executed. query : str Query to be executed accept_partial_results : bool Optional parameter. If query fails, but we receive some results, we consider results as partial. If this is True, results are returned to client, even if there are exceptions. If this is False, exception is raised. Default is False. timeout : float, optional Optional parameter. Network timeout in seconds. Default is no timeout. """ endpoint_version = self.mgmt_endpoint_version if query.startswith( ".") else self.query_endpoint_version get_raw_response = True response = self.client.execute(kusto_database, query, accept_partial_results, timeout, get_raw_response) return KqlQueryResponse(response, endpoint_version)
# Please note that if you choose this option, you'll need to authenticate for every new instance that is initialized. # It is highly recommended to create one instance and use it for all of your queries. kcsb = KustoConnectionStringBuilder.with_aad_device_authentication(cluster) # The authentication method will be taken from the chosen KustoConnectionStringBuilder. client = KustoClient(kcsb) ###################################################### ## QUERY ## ###################################################### # once authenticated, usage is as following db = "Samples" query = "StormEvents | take 10" response = client.execute(db, query) # iterating over rows is possible for row in response.primary_results[0]: # printing specific columns by index print("value at 0 {}".format(row[0])) print("\n") # printing specific columns by name print("EventType:{}".format(row["EventType"])) # tables are serializeable, so: with open("results.json", "w+") as f: f.write(str(response.primary_results[0])) # we also support dataframes: dataframe = dataframe_from_result_table(response.primary_results[0])
jsonPath="$.xtextWithNulls", cslDataType="string")) mappings.append( JsonColumnMapping( columnName="xdynamicWithNulls", jsonPath="$.xdynamicWithNulls", cslDataType="dynamic", )) return mappings KUSTO_CLIENT = KustoClient("https://toshetah.kusto.windows.net") KUSTO_INGEST_CLIENT = KustoIngestClient( "https://ingest-toshetah.kusto.windows.net") KUSTO_CLIENT.execute("PythonTest", ".drop table Deft ifexists") # Sanity test - ingest from csv to a non-existing table CSV_INGESTION_PROPERTIES = IngestionProperties( "PythonTest", "Deft", dataFormat=DataFormat.csv, mapping=Helpers.create_deft_table_csv_mappings(), ) CSV_FILE_PATH = os.path.join(os.getcwd(), "azure-kusto-ingest", "tests", "input", "dataset.csv") ZIPPED_CSV_FILE_PATH = os.path.join(os.getcwd(), "azure-kusto-ingest", "tests", "input", "dataset.csv.gz") KUSTO_INGEST_CLIENT.ingest_from_multiple_files( [CSV_FILE_PATH, ZIPPED_CSV_FILE_PATH], False, CSV_INGESTION_PROPERTIES)
with open(FILENAME, "r") as pem_file: PEM = pem_file.read() THUMBPRINT = "certificate's thumbprint" KCSB = KustoConnectionStringBuilder.with_aad_application_certificate_authentication( KUSTO_CLUSTER, CLIENT_ID, PEM, THUMBPRINT) KUSTO_CLIENT = KustoClient(KCSB) # In case you want to authenticate with the logged in AAD user. KUSTO_CLIENT = KustoClient(KUSTO_CLUSTER) KUSTO_DATABASE = "Samples" KUSTO_QUERY = "StormEvents | take 10" RESPONSE = KUSTO_CLIENT.execute(KUSTO_DATABASE, KUSTO_QUERY) for row in RESPONSE.primary_results[0]: print(row[0], " ", row["EventType"]) # Query is too big to be executed KUSTO_QUERY = "StormEvents" try: RESPONSE = KUSTO_CLIENT.execute(KUSTO_DATABASE, KUSTO_QUERY) except KustoServiceError as error: print("2. Error:", error) print("2. Is semantic error:", error.is_semantic_error()) print("2. Has partial results:", error.has_partial_results()) print("2. Result size:", len(error.get_partial_results())) RESPONSE = KUSTO_CLIENT.execute(KUSTO_DATABASE, KUSTO_QUERY,
TIMESTAMP, NodeName, redo_queue_size_mb = redo_queue_size /1024., redo_rate, failover_time_sec = redo_queue_size*1.0/redo_rate | order by TIMESTAMP asc | summarize redo_queue_list = make_list(redo_queue_size_mb), time_series = make_list(TIMESTAMP), failover_time_sec_list = make_list(failover_time_sec) by NodeName | extend redo_diff_mb= series_fir(redo_queue_list, dynamic([1,-1]), false, false) | extend redo_stats = series_stats_dynamic(redo_diff_mb) | extend failover_time_smooth_secs = series_fir(failover_time_sec_list, dynamic ([1,1,1]), true, true) //normalize= true, center= true | extend failover_time_stats = series_stats_dynamic(failover_time_smooth_secs) | project redo_stats.avg, failover_time_stats.max''' try: print("querying") failover_response = kusto_client.execute("sqlazure1", query) print("response") print(failover_response) failover_df = dataframe_from_result_table( failover_response.primary_results[0]) print("dataframe") print(failover_df) print(failover_df['redo_stats_avg']) redo_stats_avg = failover_df['redo_stats_avg'].mean() failover_time_stats_max = failover_df['failover_time_stats_max'].max() a = (redo_stats_avg, failover_time_stats_max) except:
class Kusto_Client(object): """ Kusto client wrapper for Python. KustoClient works with both 2.x and 3.x flavors of Python. All primitive types are supported. KustoClient takes care of ADAL authentication, parsing response and giving you typed result set, and offers familiar Python DB API. Test are run using nose. Examples -------- To use KustoClient, you can choose betwen two ways of authentication. For the first option, you'll need to have your own AAD application and know your client credentials (client_id and client_secret). >>> kusto_cluster = 'https://help.kusto.windows.net' >>> kusto_client = KustoClient(kusto_cluster, client_id, client_secret='your_app_secret') For the second option, you can use KustoClient's client id and authenticate using your username and password. >>> kusto_cluster = 'https://help.kusto.windows.net' >>> client_id = 'e07cf1fb-c6a6-4668-b21a-f74731afa19a' >>> kusto_client = KustoClient(kusto_cluster, client_id, username='******', password='******')""" def __init__( self, kusto_cluster, client_id=None, client_secret=None, username=None, password=None, certificate=None, certificate_thumbprint=None, authority=None, ): """ Kusto Client constructor. Parameters ---------- kusto_cluster : str Kusto cluster endpoint. Example: https://help.kusto.windows.net client_id : str The AAD application ID of the application making the request to Kusto client_secret : str The AAD application key of the application making the request to Kusto. if this is given, then username/password should not be. username : str The username of the user making the request to Kusto. if this is given, then password must follow and the client_secret should not be given. password : str The password matching the username of the user making the request to Kusto authority : 'microsoft.com', optional In case your tenant is not microsoft please use this param. """ if all([username, password]): kcsb = KustoConnectionStringBuilder.with_aad_user_password_authentication( kusto_cluster, username, password) elif all([client_id, client_secret]): kcsb = KustoConnectionStringBuilder.with_aad_application_key_authentication( kusto_cluster, client_id, client_secret) elif all([client_id, certificate, certificate_thumbprint]): kcsb = KustoConnectionStringBuilder.with_aad_application_certificate_authentication( kusto_cluster, client_id, certificate, certificate_thumbprint) else: kcsb = KustoConnectionStringBuilder.with_aad_device_authentication( kusto_cluster) if authority: kcsb.authority_id = authority self.client = KustoClient(kcsb) # replace aadhelper to use remote browser in interactive mode self.client._aad_helper = _MyAadHelper(kcsb) self.mgmt_endpoint_version = "v2" if self.client._mgmt_endpoint.endswith( "v2/rest/query") else "v1" self.query_endpoint_version = "v2" if self.client._query_endpoint.endswith( "v2/rest/query") else "v1" def execute(self, kusto_database, query, accept_partial_results=False, timeout=None): """ Execute a simple query or management command Parameters ---------- kusto_database : str Database against query will be executed. query : str Query to be executed accept_partial_results : bool Optional parameter. If query fails, but we receive some results, we consider results as partial. If this is True, results are returned to client, even if there are exceptions. If this is False, exception is raised. Default is False. timeout : float, optional Optional parameter. Network timeout in seconds. Default is no timeout. """ endpoint_version = self.mgmt_endpoint_version if query.startswith( ".") else self.query_endpoint_version get_raw_response = True response = self.client.execute(kusto_database, query, accept_partial_results, timeout, get_raw_response) return KqlResponse(response, endpoint_version)
class CaseTitlesFetcher: def __init__(self, trainingConfig, trainingId): self.trainingId = trainingId cluster = "https://usage360.kusto.windows.net" authority_id = "72f988bf-86f1-41af-91ab-2d7cd011db47" client_id = kustoClientId client_secret = kustoClientSecret kcsb = KustoConnectionStringBuilder.with_aad_application_key_authentication(cluster, client_id, client_secret, authority_id) self.kustoClient = KustoClient(kcsb) self.garbageList = [x.strip() for x in open("metadata/garbagePhrases.txt", "r").readlines()] self.striptrailers = [x.strip() for x in open("metadata/stripTrailers.txt", "r").readlines()] self.shortPhrases = [x.strip() for x in open("metadata/shortPhrasesList.txt", "r").readlines()] self.trainingConfig = trainingConfig def endSentence(self, sent): if not sent[-1]==".": return sent+"." return sent def squeeze(self, sent): while sent[-1]==".": sent = sent[:-1] return sent.replace(" ", "") def isEnglish(self, s): s = str(s) try: s.encode(encoding='utf-8').decode('ascii') except UnicodeDecodeError: return False else: return True def stripTrails(self, s): s = str(s).lower().strip() for tr in self.striptrailers: if s.endswith(tr.lower()): return self.stripTrails(s[:-len(tr)]) return s.strip() def pipeCleansing(self, s): s = self.stripTrails(s) l = s.split("|") if len(l)>1: return l[-1].lstrip() else: return s.lstrip() def extractor(self, key, group): trainingId = self.trainingId category = key[0]+"--"+key[1] lines = [(self.endSentence(row["CleanCaseTitles"]), row["SupportCenterCaseLink"]) for ind, row in group.iterrows()] resultTitles = [] if self.trainingConfig.runExtractionEnabled and len(lines)>10: numsentences = group.shape[0] loggerInstance.logToFile("{0}.log".format(trainingId), "Extractor: Running extractor on category " + category + " containing " + str(numsentences) + " case titles") doc = " ".join([x[0] for x in lines]) keysentences = retrieveSentences(doc, max([10, int(numsentences*self.trainingConfig.extractionRatio)])*10) loggerInstance.logToFile("{0}.log".format(trainingId), "Extractor: Extracted " + str(len(keysentences)) + " sentences.") for sent in keysentences: caselinks = [x[1] for x in lines if self.squeeze(x[0])==self.squeeze(sent)] if not caselinks: caselinks = [x[1] for x in lines if self.squeeze(sent) in self.squeeze(x[0])] if not caselinks: caselinks = [x[1] for x in lines if re.sub('[^0-9a-zA-Z]+', '', sent)==re.sub('[^0-9a-zA-Z]+', '', x[0])] if caselinks: resultTitles.append({"text": sent, "links": caselinks, "category": category}) else: loggerInstance.logToFile("{0}.log".format(trainingId), "Extractor: Disabled or not enough lines for summarization") resultTitles = [{"text": x[0], "links": x[1], "category": category} for x in lines] return resultTitles def runCaseTitlesExtraction(self, df, productid, datapath): trainingId = self.trainingId if self.trainingConfig.downloadCaseTitlesEnabled and df.any: df["Incidents_SupportTopicL2Current"]=df["Incidents_SupportTopicL2Current"].fillna("NOSELECTION") df["Incidents_SupportTopicL3Current"]=df["Incidents_SupportTopicL3Current"].fillna("NOSELECTION") groups = df.groupby(["Incidents_SupportTopicL2Current", "Incidents_SupportTopicL3Current"]) loggerInstance.logToFile("{0}.log".format(trainingId), "RunCaseTitlesExtraction: Processing " + str(df.shape[0]) + " case titles across " + str(len(list(groups))) + " categories") results = sorted(list(itertools.chain.from_iterable([self.extractor(key, group) for key, group in groups])), key=lambda x: x["text"]) else: results = [] try: sampleUtterances = json.loads(open(os.path.join(datapath, "SampleUtterances.json"), "r").read()) #sampleUtterances = list(set(sampleUtterances+results)) for x in results: found = False for y in sampleUtterances["incidenttitles"]: if x["text"]<y["text"]: break elif x["text"]==y["text"]: y["links"] += x["links"] y["links"] = list(set(y["links"])) found = True break if not found: sampleUtterances["incidenttitles"].append(x) open(os.path.join(datapath, "SampleUtterances.json"), "w").write(json.dumps(sampleUtterances, indent=4)) loggerInstance.logToFile("{0}.log".format(trainingId), "RunCaseTitlesExtraction: Successfully written extracted case titles to file SampleUtterances.json") except (FileNotFoundError) as e: loggerInstance.logToFile("{0}.log".format(trainingId), "[ERROR]RunCaseTitlesExtraction: File SampleUtterances.json does not exist, creating new file.") open(os.path.join(datapath, "SampleUtterances.json"), "w").write(json.dumps({"incidenttitles" : results, "stackoverflowtitles": []}, indent=4)) def fetchCaseTitles(self, productid, datapath): trainingId = self.trainingId if self.trainingConfig.downloadCaseTitlesEnabled: ndays = int(self.trainingConfig.caseTitlesDaysSince) try: db = "Product360" query = """cluster('usage360').database('Product360'). AllCloudSupportIncidentDataWithP360MetadataMapping | where DerivedProductIDStr in ('{0}') | where Incidents_CreatedTime >= ago({1}d) | summarize IncidentTime = any(Incidents_CreatedTime) by Incidents_IncidentId , Incidents_Severity , Incidents_ProductName , Incidents_SupportTopicL2Current , Incidents_SupportTopicL3Current, Incidents_Title | extend SupportCenterCaseLink = strcat('https://azuresupportcenter.msftcloudes.com/caseoverview?srId=', Incidents_IncidentId) | order by Incidents_SupportTopicL3Current asc""".format(productid, ndays) response = self.kustoClient.execute(db, query) except Exception as e: raise TrainingException("KustoFetcher: " + str(e)) try: df = dataframe_from_result_table(response.primary_results[0]) loggerInstance.logToFile("{0}.log".format(trainingId), "DataCleansing: " + str(df.shape[0]) + " incidents fetched") #Remove all non english cases df["isEnglish"] = df["Incidents_Title"].map(self.isEnglish) df_eng = df[df["isEnglish"]==True] del df_eng["isEnglish"] loggerInstance.logToFile("{0}.log".format(trainingId), "DataCleansing: " + str(df.shape[0] - df_eng.shape[0]) + " non English language cases removed") #all cases with character length 3 or less mask = (df_eng["Incidents_Title"].str.len()>3) df_eng_1 = df_eng[mask] #Extract case title from piped sentences df_eng_1["Incidents_Title_PipeCleansed"] = df_eng_1["Incidents_Title"].map(self.pipeCleansing) #Remove any content in square brackets df_eng_1["Incidents_Title_PipeCleansed"] = df_eng_1["Incidents_Title_PipeCleansed"].map(lambda x: re.sub("[\\[].*?[\\]]", "", x)) #Remove any remaining titles with character length 3 or less mask = (df_eng_1["Incidents_Title_PipeCleansed"].str.len()>3) df_eng_2 = df_eng_1[mask] #Remove any garbage phrases (defined in garbage list) mask = (df_eng_2["Incidents_Title_PipeCleansed"].isin(self.garbageList)) df_eng_clean = df_eng_2[~mask] loggerInstance.logToFile("{0}.log".format(trainingId), "DataCleansing: " + str(df_eng.shape[0] - df_eng_clean.shape[0]) + " garbage case title incidents removed") #Remove any cases with two or less words (Except for short phrases that make sense) df_eng_clean["wordcount"] = df_eng_clean["Incidents_Title_PipeCleansed"].map(lambda x: len([a for a in x.split() if len(a)>2])) df_eng_clean["drop"] = df_eng_clean[["Incidents_Title_PipeCleansed", "wordcount"]].apply(lambda x: (x["Incidents_Title_PipeCleansed"] not in self.shortPhrases) and (x["wordcount"]<2), axis=1) df_eng_clean = df_eng_clean[df_eng_clean["drop"] == False] del df_eng_clean["drop"] del df_eng_clean["wordcount"] df_eng_clean["CleanCaseTitles"] = df_eng_clean["Incidents_Title_PipeCleansed"] del df_eng_clean["Incidents_Title_PipeCleansed"] loggerInstance.logToFile("{0}.log".format(trainingId), "DataCleansing: " + str(df_eng_clean.shape[0]) + " incidents will be processed for summarization") except Exception as e: raise TrainingException("DataCleansing: " + str(e)) try: self.runCaseTitlesExtraction(df_eng_clean, productid, datapath) except Exception as e: raise TrainingException("CaseTitleExtraction: " + str(e)) else: loggerInstance.logToFile("{0}.log".format(trainingId), "CaseTitleExtraction: Disabled") try: self.runCaseTitlesExtraction(None, productid, datapath) except Exception as e: raise TrainingException("CaseTitleExtraction: " + str(e))
import pandas from azure.kusto.data.exceptions import KustoServiceError from azure.kusto.data.helpers import dataframe_from_result_table from azure.kusto.data.request import KustoClient, KustoConnectionStringBuilder, ClientRequestProperties use_cache = False if use_cache: print('using cache') else: cluster = "https://icmcluster.kusto.windows.net" output_path = './outputs' os.makedirs(output_path, exist_ok=True) # It is highly recommended to create one instance and use it for all of your queries. kcsb = KustoConnectionStringBuilder.with_aad_device_authentication(cluster) # The authentication method will be taken from the chosen KustoConnectionStringBuilder. client = KustoClient(kcsb) db = "IcmDataWarehouse" query = open("kustoicmquery.txt", "r").read() file_name = os.path.join(output_path, 'ICM.csv') response = client.execute(db, query) # we also support dataframes: df_incidents = dataframe_from_result_table(response.primary_results[0]) df_incidents.to_csv(file_name)
from azure.kusto.data.request import KustoClient, KustoConnectionStringBuilder import pandas as pd LIMIT_BENCHMARK_RUNS = 30 EXPECTED_DURATION_OF_SLEEPING_IN_MINUTES = 10 cluster_name = "https://sqlazureweu2.kustomfa.windows.net" kustoStringBuilder = KustoConnectionStringBuilder.with_aad_device_authentication( cluster_name) kusto_client = KustoClient(kustoStringBuilder) connection = CLperfDB.connect() if __name__ == "__main__": #make kusto client fakely fake_response = kusto_client.execute("sqlazure1", "MonBackup|take 1") number_of_scheduled_benchmarks = 0 instances_json = open('Configurations/instances.json', 'r+').read() instances = json.loads(instances_json) #for all jobs successful_runs = [] results = [] failed_runs = [] instance_occupation = {} for instance_info in instances: instance_occupation[instance_info['instance_name']] = -1 pending_runs = []