예제 #1
0
    def create(cls, server, querybuilders, login=False):
        if server is None:
            raise ValueError("Failed to create client, no server provided.")

        client = KustoClient(server)
        client = cls.__inject_queries(client, querybuilders)

        # force the user to authenticate before returning
        if login:
            client.execute(cls.kustoDatabase, ".show version")
        return client
예제 #2
0
def get_client(cluster):
    """
    get cached, authenticated client for given cluster
    """
    global _client_cache
    c = _client_cache.get(cluster)
    if c is None:
        c = KustoClient(
            KustoConnectionStringBuilder.with_aad_device_authentication(
                cluster))
        c.execute('VSO', 'print "a" | take 0')
        _client_cache[cluster] = c
    return c
예제 #3
0
    def run_query(self, query, user):

        kcsb = KustoConnectionStringBuilder.with_aad_application_key_authentication(
            connection_string=self.configuration["cluster"],
            aad_app_id=self.configuration["azure_ad_client_id"],
            app_key=self.configuration["azure_ad_client_secret"],
            authority_id=self.configuration["azure_ad_tenant_id"],
        )

        client = KustoClient(kcsb)

        db = self.configuration["database"]
        try:
            response = client.execute(db, query)

            result_cols = response.primary_results[0].columns
            result_rows = response.primary_results[0].rows

            columns = []
            rows = []
            for c in result_cols:
                columns.append(
                    {
                        "name": c.column_name,
                        "friendly_name": c.column_name,
                        "type": TYPES_MAP.get(c.column_type, None),
                    }
                )

            # rows must be [{'column1': value, 'column2': value}]
            for row in result_rows:
                rows.append(row.to_dict())

            error = None
            data = {"columns": columns, "rows": rows}
            json_data = json_dumps(data)

        except KustoServiceError as err:
            json_data = None
            try:
                error = err.args[1][0]["error"]["@message"]
            except (IndexError, KeyError):
                error = err.args[1]
        except KeyboardInterrupt:
            json_data = None
            error = "Query cancelled by user."

        return json_data, error
예제 #4
0
    def kusto_output():
        try:
            kcsb = KustoConnectionStringBuilder.with_aad_application_key_authentication(
                SOCAlertConsumer.cluster, SOCAlertConsumer.client_id,
                SOCAlertConsumer.client_secret, SOCAlertConsumer.authority_id)
            logger.AppLogging.auditlogger.info(
                "Successfully loaded Kusto Connection Strings" + " " +
                SOCAlertConsumer.cluster + " with AppID " +
                SOCAlertConsumer.client_id)
        except RuntimeError as ie:
            logger.AppLogging.auditlogger.error(
                "Unable to import Kusto Connection Strings. "
                "Please check your configuration" + SOCAlertConsumer.cluster +
                "with AppID" + SOCAlertConsumer.client_id)
            logger.AppLogging.auditlogger.error(str(ie))

        client = KustoClient(kcsb)
        kusto_query = open(".//KustoQuery//SOCAlerts.csl",
                           "r")  # change to forward slash for linux (//)
        query = kusto_query.read()
        kusto_query.close()
        try:
            response = client.execute(SOCAlertConsumer.db, query)
            logger.AppLogging.auditlogger.info(
                "Successfully received response from Kusto")
            logger.AppLogging.auditlogger.info(
                "Query output is saved to output folder")
        except RuntimeError as r:
            logger.AppLogging.auditlogger.error(
                "There was an error in receiving response from Kusto")
            logger.AppLogging.auditlogger.error(str(r))

        for row in response.primary_results:
            filetime = time.strftime("%Y%m%d-%H%M%S")
            file = open(".//output//alerts-" + filetime + ".json",
                        "x")  # forward slash works fine on Win & nix
            file.write(str(row))
            file.close()
예제 #5
0
                              cslDataType="timespan"))
        mappings.append(
            JsonColumnMapping(columnName="xtextWithNulls",
                              jsonPath="$.xtextWithNulls",
                              cslDataType="string"))
        mappings.append(
            JsonColumnMapping(columnName="xdynamicWithNulls",
                              jsonPath="$.xdynamicWithNulls",
                              cslDataType="dynamic"))
        return mappings


client = KustoClient("https://toshetah.kusto.windows.net")
ingest_client = KustoIngestClient("https://ingest-toshetah.kusto.windows.net")
ingest_status_q = KustoIngestStatusQueues(ingest_client)
client.execute("PythonTest", ".drop table Deft ifexists")


@pytest.mark.run(order=1)
def test_csv_ingest_non_existing_table():
    csv_ingest_props = IngestionProperties(
        "PythonTest",
        "Deft",
        dataFormat=DataFormat.csv,
        mapping=Helpers.create_deft_table_csv_mappings(),
        reportLevel=ReportLevel.FailuresAndSuccesses,
    )
    csv_file_path = os.path.join(os.getcwd(), "azure-kusto-ingest", "tests",
                                 "input", "dataset.csv")
    zipped_csv_file_path = os.path.join(os.getcwd(), "azure-kusto-ingest",
                                        "tests", "input", "dataset.csv.gz")
예제 #6
0
    cluster, client_id, PEM, thumbprint, authority_id)

client = KustoClient(kcsb)

# In case you want to authenticate with the logged in AAD user.
client = KustoClient(cluster)

######################################################
##                       QUERY                      ##
######################################################

# once authenticated, usage is as following
db = "Samples"
query = "StormEvents | take 10"

response = client.execute(db, query)

# iterating over rows is possible
for row in response.primary_results[0]:
    # printing specific columns by index
    print("value at 0 {}".format(row[0]))
    print("\n")
    # printing specific columns by name
    print("EventType:{}".format(row["EventType"]))

# tables are serializeable, so:
with open("results.json", "w+") as f:
    f.write(str(response.primary_results[0]))

# we also support dataframes:
dataframe = dataframe_from_result_table(response.primary_results[0])
예제 #7
0
        return mappings


cluster = "Dadubovs1.westus"  # "toshetah"
db_name = "TestingDatabase"  # "PythonTest"
table_name = "Deft"

engine_kcsb = KustoConnectionStringBuilder.with_aad_device_authentication(
    "https://{}.kusto.windows.net".format(cluster))
dm_kcsb = KustoConnectionStringBuilder.with_aad_device_authentication(
    "https://ingest-{}.kusto.windows.net".format(cluster))
client = KustoClient(engine_kcsb)
ingest_client = KustoIngestClient(dm_kcsb)
ingest_status_q = KustoIngestStatusQueues(ingest_client)

client.execute(db_name, ".drop table {} ifexists".format(table_name))


@pytest.mark.run(order=1)
def test_csv_ingest_non_existing_table():
    csv_ingest_props = IngestionProperties(
        db_name,
        table_name,
        dataFormat=DataFormat.csv,
        mapping=Helpers.create_deft_table_csv_mappings(),
        reportLevel=ReportLevel.FailuresAndSuccesses,
    )
    csv_file_path = os.path.join(os.getcwd(), "azure-kusto-ingest", "tests",
                                 "input", "dataset.csv")
    zipped_csv_file_path = os.path.join(os.getcwd(), "azure-kusto-ingest",
                                        "tests", "input", "dataset.csv.gz")
예제 #8
0
class Kusto_Client(object):
    """
    Kusto client wrapper for Python.

    KustoClient works with both 2.x and 3.x flavors of Python. All primitive types are supported.
    KustoClient takes care of ADAL authentication, parsing response and giving you typed result set,
    and offers familiar Python DB API.

    Test are run using nose.

    Examples
    --------
    To use KustoClient, you can choose betwen two ways of authentication.
     
    For the first option, you'll need to have your own AAD application and know your client credentials (client_id and client_secret).
    >>> kusto_cluster = 'https://help.kusto.windows.net'
    >>> kusto_client = KustoClient(kusto_cluster, client_id, client_secret='your_app_secret')

    For the second option, you can use KustoClient's client id and authenticate using your username and password.
    >>> kusto_cluster = 'https://help.kusto.windows.net'
    >>> client_id = 'e07cf1fb-c6a6-4668-b21a-f74731afa19a'
    >>> kusto_client = KustoClient(kusto_cluster, client_id, username='******', password='******')"""

    _DEFAULT_CLIENTID = "db662dc1-0cfe-4e1c-a843-19a68e65be58"  # kusto client app, don't know app name

    #    _DEFAULT_CLIENTID = "8430759c-5626-4577-b151-d0755f5355d8" # kusto client app, don't know app name

    def __init__(self, conn_kv):
        """
        Kusto Client constructor.

        Parameters
        ----------
        kusto_cluster : str
            Kusto cluster endpoint. Example: https://help.kusto.windows.net
        client_id : str
            The AAD application ID of the application making the request to Kusto
        client_secret : str
            The AAD application key of the application making the request to Kusto.
            if this is given, then username/password should not be.
        username : str
            The username of the user making the request to Kusto.
            if this is given, then password must follow and the client_secret should not be given.
        password : str
            The password matching the username of the user making the request to Kusto
        authority : 'microsoft.com', optional
            In case your tenant is not microsoft please use this param.
        """
        kusto_cluster = "https://{0}.kusto.windows.net".format(
            conn_kv[ConnStrKeys.CLUSTER])

        if all([
                conn_kv.get(ConnStrKeys.USERNAME),
                conn_kv.get(ConnStrKeys.PASSWORD)
        ]):
            kcsb = KustoConnectionStringBuilder.with_aad_user_password_authentication(
                kusto_cluster, conn_kv.get(ConnStrKeys.USERNAME),
                conn_kv.get(ConnStrKeys.PASSWORD))
            if conn_kv.get(ConnStrKeys.TENANT) is not None:
                kcsb.authority_id = conn_kv.get(ConnStrKeys.TENANT)

        elif all([
                conn_kv.get(ConnStrKeys.CLIENTID),
                conn_kv.get(ConnStrKeys.CLIENTSECRET)
        ]):
            kcsb = KustoConnectionStringBuilder.with_aad_application_key_authentication(
                kusto_cluster, conn_kv.get(ConnStrKeys.CLIENTID),
                conn_kv.get(ConnStrKeys.CLIENTSECRET),
                conn_kv.get(ConnStrKeys.TENANT))
        elif all([
                conn_kv.get(ConnStrKeys.CLIENTID),
                conn_kv.get(ConnStrKeys.CERTIFICATE),
                conn_kv.get(ConnStrKeys.CERTIFICATE_THUMBPRINT)
        ]):
            kcsb = KustoConnectionStringBuilder.with_aad_application_certificate_authentication(
                kusto_cluster,
                conn_kv.get(ConnStrKeys.CLIENTID),
                conn_kv.get(ConnStrKeys.CERTIFICATE),
                conn_kv.get(ConnStrKeys.CERTIFICATE_THUMBPRINT),
                conn_kv.get(ConnStrKeys.TENANT),
            )
        else:
            kcsb = KustoConnectionStringBuilder.with_aad_device_authentication(
                kusto_cluster)
            if conn_kv.get(ConnStrKeys.TENANT) is not None:
                kcsb.authority_id = conn_kv.get(ConnStrKeys.TENANT)

        self.client = KustoClient(kcsb)

        # replace aadhelper to use remote browser in interactive mode
        self.client._aad_helper = _MyAadHelper(kcsb, self._DEFAULT_CLIENTID)

        self.mgmt_endpoint_version = "v2" if self.client._mgmt_endpoint.endswith(
            "v2/rest/query") else "v1"
        self.query_endpoint_version = "v2" if self.client._query_endpoint.endswith(
            "v2/rest/query") else "v1"

    def execute(self,
                kusto_database,
                query,
                accept_partial_results=False,
                timeout=None):
        """ Execute a simple query or management command

        Parameters
        ----------
        kusto_database : str
            Database against query will be executed.
        query : str
            Query to be executed
        accept_partial_results : bool
            Optional parameter. If query fails, but we receive some results, we consider results as partial.
            If this is True, results are returned to client, even if there are exceptions.
            If this is False, exception is raised. Default is False.
        timeout : float, optional
            Optional parameter. Network timeout in seconds. Default is no timeout.
        """
        endpoint_version = self.mgmt_endpoint_version if query.startswith(
            ".") else self.query_endpoint_version
        get_raw_response = True
        response = self.client.execute(kusto_database, query,
                                       accept_partial_results, timeout,
                                       get_raw_response)
        return KqlQueryResponse(response, endpoint_version)
예제 #9
0
# Please note that if you choose this option, you'll need to authenticate for every new instance that is initialized.
# It is highly recommended to create one instance and use it for all of your queries.
kcsb = KustoConnectionStringBuilder.with_aad_device_authentication(cluster)

# The authentication method will be taken from the chosen KustoConnectionStringBuilder.
client = KustoClient(kcsb)

######################################################
##                       QUERY                      ##
######################################################

# once authenticated, usage is as following
db = "Samples"
query = "StormEvents | take 10"

response = client.execute(db, query)

# iterating over rows is possible
for row in response.primary_results[0]:
    # printing specific columns by index
    print("value at 0 {}".format(row[0]))
    print("\n")
    # printing specific columns by name
    print("EventType:{}".format(row["EventType"]))

# tables are serializeable, so:
with open("results.json", "w+") as f:
    f.write(str(response.primary_results[0]))

# we also support dataframes:
dataframe = dataframe_from_result_table(response.primary_results[0])
예제 #10
0
                              jsonPath="$.xtextWithNulls",
                              cslDataType="string"))
        mappings.append(
            JsonColumnMapping(
                columnName="xdynamicWithNulls",
                jsonPath="$.xdynamicWithNulls",
                cslDataType="dynamic",
            ))
        return mappings


KUSTO_CLIENT = KustoClient("https://toshetah.kusto.windows.net")
KUSTO_INGEST_CLIENT = KustoIngestClient(
    "https://ingest-toshetah.kusto.windows.net")

KUSTO_CLIENT.execute("PythonTest", ".drop table Deft ifexists")

# Sanity test - ingest from csv to a non-existing table
CSV_INGESTION_PROPERTIES = IngestionProperties(
    "PythonTest",
    "Deft",
    dataFormat=DataFormat.csv,
    mapping=Helpers.create_deft_table_csv_mappings(),
)
CSV_FILE_PATH = os.path.join(os.getcwd(), "azure-kusto-ingest", "tests",
                             "input", "dataset.csv")
ZIPPED_CSV_FILE_PATH = os.path.join(os.getcwd(), "azure-kusto-ingest", "tests",
                                    "input", "dataset.csv.gz")
KUSTO_INGEST_CLIENT.ingest_from_multiple_files(
    [CSV_FILE_PATH, ZIPPED_CSV_FILE_PATH], False, CSV_INGESTION_PROPERTIES)
예제 #11
0
with open(FILENAME, "r") as pem_file:
    PEM = pem_file.read()

THUMBPRINT = "certificate's thumbprint"
KCSB = KustoConnectionStringBuilder.with_aad_application_certificate_authentication(
    KUSTO_CLUSTER, CLIENT_ID, PEM, THUMBPRINT)

KUSTO_CLIENT = KustoClient(KCSB)

# In case you want to authenticate with the logged in AAD user.
KUSTO_CLIENT = KustoClient(KUSTO_CLUSTER)

KUSTO_DATABASE = "Samples"
KUSTO_QUERY = "StormEvents | take 10"

RESPONSE = KUSTO_CLIENT.execute(KUSTO_DATABASE, KUSTO_QUERY)
for row in RESPONSE.primary_results[0]:
    print(row[0], " ", row["EventType"])

# Query is too big to be executed
KUSTO_QUERY = "StormEvents"
try:
    RESPONSE = KUSTO_CLIENT.execute(KUSTO_DATABASE, KUSTO_QUERY)
except KustoServiceError as error:
    print("2. Error:", error)
    print("2. Is semantic error:", error.is_semantic_error())
    print("2. Has partial results:", error.has_partial_results())
    print("2. Result size:", len(error.get_partial_results()))

RESPONSE = KUSTO_CLIENT.execute(KUSTO_DATABASE,
                                KUSTO_QUERY,
예제 #12
0
    TIMESTAMP,
    NodeName,
    redo_queue_size_mb = redo_queue_size /1024.,
    redo_rate,
    failover_time_sec = redo_queue_size*1.0/redo_rate
| order by TIMESTAMP asc
| summarize redo_queue_list = make_list(redo_queue_size_mb), time_series = make_list(TIMESTAMP), failover_time_sec_list = make_list(failover_time_sec) by NodeName
| extend redo_diff_mb= series_fir(redo_queue_list, dynamic([1,-1]), false, false)
| extend redo_stats = series_stats_dynamic(redo_diff_mb)
| extend failover_time_smooth_secs = series_fir(failover_time_sec_list, dynamic ([1,1,1]), true, true) //normalize= true, center= true
| extend failover_time_stats = series_stats_dynamic(failover_time_smooth_secs)
| project redo_stats.avg, failover_time_stats.max'''

try:
    print("querying")
    failover_response = kusto_client.execute("sqlazure1", query)
    print("response")
    print(failover_response)

    failover_df = dataframe_from_result_table(
        failover_response.primary_results[0])
    print("dataframe")
    print(failover_df)

    print(failover_df['redo_stats_avg'])

    redo_stats_avg = failover_df['redo_stats_avg'].mean()
    failover_time_stats_max = failover_df['failover_time_stats_max'].max()

    a = (redo_stats_avg, failover_time_stats_max)
except:
예제 #13
0
class Kusto_Client(object):
    """
    Kusto client wrapper for Python.

    KustoClient works with both 2.x and 3.x flavors of Python. All primitive types are supported.
    KustoClient takes care of ADAL authentication, parsing response and giving you typed result set,
    and offers familiar Python DB API.

    Test are run using nose.

    Examples
    --------
    To use KustoClient, you can choose betwen two ways of authentication.
     
    For the first option, you'll need to have your own AAD application and know your client credentials (client_id and client_secret).
    >>> kusto_cluster = 'https://help.kusto.windows.net'
    >>> kusto_client = KustoClient(kusto_cluster, client_id, client_secret='your_app_secret')

    For the second option, you can use KustoClient's client id and authenticate using your username and password.
    >>> kusto_cluster = 'https://help.kusto.windows.net'
    >>> client_id = 'e07cf1fb-c6a6-4668-b21a-f74731afa19a'
    >>> kusto_client = KustoClient(kusto_cluster, client_id, username='******', password='******')"""
    def __init__(
        self,
        kusto_cluster,
        client_id=None,
        client_secret=None,
        username=None,
        password=None,
        certificate=None,
        certificate_thumbprint=None,
        authority=None,
    ):
        """
        Kusto Client constructor.

        Parameters
        ----------
        kusto_cluster : str
            Kusto cluster endpoint. Example: https://help.kusto.windows.net
        client_id : str
            The AAD application ID of the application making the request to Kusto
        client_secret : str
            The AAD application key of the application making the request to Kusto.
            if this is given, then username/password should not be.
        username : str
            The username of the user making the request to Kusto.
            if this is given, then password must follow and the client_secret should not be given.
        password : str
            The password matching the username of the user making the request to Kusto
        authority : 'microsoft.com', optional
            In case your tenant is not microsoft please use this param.
        """
        if all([username, password]):
            kcsb = KustoConnectionStringBuilder.with_aad_user_password_authentication(
                kusto_cluster, username, password)
        elif all([client_id, client_secret]):
            kcsb = KustoConnectionStringBuilder.with_aad_application_key_authentication(
                kusto_cluster, client_id, client_secret)
        elif all([client_id, certificate, certificate_thumbprint]):
            kcsb = KustoConnectionStringBuilder.with_aad_application_certificate_authentication(
                kusto_cluster, client_id, certificate, certificate_thumbprint)
        else:
            kcsb = KustoConnectionStringBuilder.with_aad_device_authentication(
                kusto_cluster)

        if authority:
            kcsb.authority_id = authority

        self.client = KustoClient(kcsb)

        # replace aadhelper to use remote browser in interactive mode
        self.client._aad_helper = _MyAadHelper(kcsb)

        self.mgmt_endpoint_version = "v2" if self.client._mgmt_endpoint.endswith(
            "v2/rest/query") else "v1"
        self.query_endpoint_version = "v2" if self.client._query_endpoint.endswith(
            "v2/rest/query") else "v1"

    def execute(self,
                kusto_database,
                query,
                accept_partial_results=False,
                timeout=None):
        """ Execute a simple query or management command

        Parameters
        ----------
        kusto_database : str
            Database against query will be executed.
        query : str
            Query to be executed
        accept_partial_results : bool
            Optional parameter. If query fails, but we receive some results, we consider results as partial.
            If this is True, results are returned to client, even if there are exceptions.
            If this is False, exception is raised. Default is False.
        timeout : float, optional
            Optional parameter. Network timeout in seconds. Default is no timeout.
        """
        endpoint_version = self.mgmt_endpoint_version if query.startswith(
            ".") else self.query_endpoint_version
        get_raw_response = True
        response = self.client.execute(kusto_database, query,
                                       accept_partial_results, timeout,
                                       get_raw_response)
        return KqlResponse(response, endpoint_version)
class CaseTitlesFetcher:
    def __init__(self, trainingConfig, trainingId):
        self.trainingId = trainingId
        cluster = "https://usage360.kusto.windows.net"
        authority_id = "72f988bf-86f1-41af-91ab-2d7cd011db47"
        client_id = kustoClientId
        client_secret = kustoClientSecret
        kcsb = KustoConnectionStringBuilder.with_aad_application_key_authentication(cluster, client_id, client_secret, authority_id)
        self.kustoClient = KustoClient(kcsb)
        self.garbageList = [x.strip() for x in open("metadata/garbagePhrases.txt", "r").readlines()]
        self.striptrailers = [x.strip() for x in open("metadata/stripTrailers.txt", "r").readlines()]
        self.shortPhrases = [x.strip() for x in open("metadata/shortPhrasesList.txt", "r").readlines()]
        self.trainingConfig = trainingConfig

    def endSentence(self, sent):
        if not sent[-1]==".":
            return sent+"."
        return sent

    def squeeze(self, sent):
        while sent[-1]==".":
            sent = sent[:-1]
        return sent.replace(" ", "")

    def isEnglish(self, s):
        s = str(s)
        try:
            s.encode(encoding='utf-8').decode('ascii')
        except UnicodeDecodeError:
            return False
        else:
            return True

    def stripTrails(self, s):
        s = str(s).lower().strip()
        for tr in self.striptrailers:
            if s.endswith(tr.lower()):
                return self.stripTrails(s[:-len(tr)])
        return s.strip()

    def pipeCleansing(self, s):
        s = self.stripTrails(s)
        l = s.split("|")
        if len(l)>1:
            return l[-1].lstrip()
        else:
            return s.lstrip()

    def extractor(self, key, group):
        trainingId = self.trainingId
        category = key[0]+"--"+key[1]
        lines = [(self.endSentence(row["CleanCaseTitles"]), row["SupportCenterCaseLink"])  for ind, row in group.iterrows()]
        resultTitles = []
        if self.trainingConfig.runExtractionEnabled and len(lines)>10:
            numsentences = group.shape[0]
            loggerInstance.logToFile("{0}.log".format(trainingId), "Extractor: Running extractor on category " + category + " containing " + str(numsentences) + " case titles")
            doc = " ".join([x[0] for x in lines])
            keysentences = retrieveSentences(doc, max([10, int(numsentences*self.trainingConfig.extractionRatio)])*10)
            loggerInstance.logToFile("{0}.log".format(trainingId), "Extractor: Extracted " + str(len(keysentences)) + " sentences.")
            for sent in keysentences:
                caselinks = [x[1] for x in lines if self.squeeze(x[0])==self.squeeze(sent)]
                if not caselinks:
                    caselinks = [x[1] for x in lines if self.squeeze(sent) in self.squeeze(x[0])]
                if not caselinks:
                    caselinks = [x[1] for x in lines if re.sub('[^0-9a-zA-Z]+', '', sent)==re.sub('[^0-9a-zA-Z]+', '', x[0])]
                if caselinks:
                    resultTitles.append({"text": sent, "links": caselinks, "category": category})
        else:
            loggerInstance.logToFile("{0}.log".format(trainingId), "Extractor: Disabled or not enough lines for summarization")
            resultTitles = [{"text": x[0], "links": x[1], "category": category} for x in lines]
        return resultTitles

    def runCaseTitlesExtraction(self, df, productid, datapath):
        trainingId = self.trainingId
        if self.trainingConfig.downloadCaseTitlesEnabled and df.any:
            df["Incidents_SupportTopicL2Current"]=df["Incidents_SupportTopicL2Current"].fillna("NOSELECTION")
            df["Incidents_SupportTopicL3Current"]=df["Incidents_SupportTopicL3Current"].fillna("NOSELECTION")
            groups = df.groupby(["Incidents_SupportTopicL2Current", "Incidents_SupportTopicL3Current"])
            loggerInstance.logToFile("{0}.log".format(trainingId), "RunCaseTitlesExtraction: Processing " + str(df.shape[0]) + " case titles across " + str(len(list(groups))) + " categories")
            results = sorted(list(itertools.chain.from_iterable([self.extractor(key, group) for key, group in groups])), key=lambda x: x["text"])
        else:
            results = []
        try:
            sampleUtterances = json.loads(open(os.path.join(datapath, "SampleUtterances.json"), "r").read())
            #sampleUtterances = list(set(sampleUtterances+results))
            for x in results:
                found = False
                for y in sampleUtterances["incidenttitles"]:
                    if x["text"]<y["text"]:
                        break
                    elif x["text"]==y["text"]:
                        y["links"] += x["links"]
                        y["links"] = list(set(y["links"]))
                        found = True
                        break
                if not found:
                    sampleUtterances["incidenttitles"].append(x)
            open(os.path.join(datapath, "SampleUtterances.json"), "w").write(json.dumps(sampleUtterances, indent=4))
            loggerInstance.logToFile("{0}.log".format(trainingId), "RunCaseTitlesExtraction: Successfully written extracted case titles to file SampleUtterances.json")
        except (FileNotFoundError) as e:
            loggerInstance.logToFile("{0}.log".format(trainingId), "[ERROR]RunCaseTitlesExtraction: File SampleUtterances.json does not exist, creating new file.")
            open(os.path.join(datapath, "SampleUtterances.json"), "w").write(json.dumps({"incidenttitles" : results, "stackoverflowtitles": []}, indent=4))

    def fetchCaseTitles(self, productid, datapath):
        trainingId = self.trainingId
        if self.trainingConfig.downloadCaseTitlesEnabled:
            ndays = int(self.trainingConfig.caseTitlesDaysSince)
            try:
                db = "Product360"
                query = """cluster('usage360').database('Product360').
            AllCloudSupportIncidentDataWithP360MetadataMapping
            | where DerivedProductIDStr in ('{0}')
            | where Incidents_CreatedTime >= ago({1}d)
            | summarize IncidentTime = any(Incidents_CreatedTime) by Incidents_IncidentId , Incidents_Severity , Incidents_ProductName , Incidents_SupportTopicL2Current , Incidents_SupportTopicL3Current, Incidents_Title  
            | extend SupportCenterCaseLink = strcat('https://azuresupportcenter.msftcloudes.com/caseoverview?srId=', Incidents_IncidentId)
            | order by Incidents_SupportTopicL3Current asc""".format(productid, ndays)
                response = self.kustoClient.execute(db, query)
            except Exception as e:
                raise TrainingException("KustoFetcher: " + str(e))
            
            try:
                df = dataframe_from_result_table(response.primary_results[0])
                loggerInstance.logToFile("{0}.log".format(trainingId), "DataCleansing: " + str(df.shape[0]) + " incidents fetched")
            
                #Remove all non english cases
                df["isEnglish"] = df["Incidents_Title"].map(self.isEnglish)
                df_eng = df[df["isEnglish"]==True]
                del df_eng["isEnglish"]
                loggerInstance.logToFile("{0}.log".format(trainingId), "DataCleansing: " + str(df.shape[0] - df_eng.shape[0]) + " non English language cases removed")
            
                #all cases with character length 3 or less
                mask = (df_eng["Incidents_Title"].str.len()>3)
                df_eng_1 = df_eng[mask]
            
                #Extract case title from piped sentences
                df_eng_1["Incidents_Title_PipeCleansed"] = df_eng_1["Incidents_Title"].map(self.pipeCleansing)
                
                #Remove any content in square brackets
                df_eng_1["Incidents_Title_PipeCleansed"] = df_eng_1["Incidents_Title_PipeCleansed"].map(lambda x: re.sub("[\\[].*?[\\]]", "", x))
            
                #Remove any remaining titles with character length 3 or less
                mask = (df_eng_1["Incidents_Title_PipeCleansed"].str.len()>3)
                df_eng_2 = df_eng_1[mask]

                #Remove any garbage phrases (defined in garbage list)
                mask = (df_eng_2["Incidents_Title_PipeCleansed"].isin(self.garbageList))
                df_eng_clean = df_eng_2[~mask]
                loggerInstance.logToFile("{0}.log".format(trainingId), "DataCleansing: " + str(df_eng.shape[0] - df_eng_clean.shape[0]) + " garbage case title incidents removed")
                
                #Remove any cases with two or less words (Except for short phrases that make sense)
                df_eng_clean["wordcount"] = df_eng_clean["Incidents_Title_PipeCleansed"].map(lambda x: len([a for a in x.split() if len(a)>2]))
                df_eng_clean["drop"] = df_eng_clean[["Incidents_Title_PipeCleansed", "wordcount"]].apply(lambda x: (x["Incidents_Title_PipeCleansed"] not in self.shortPhrases) and (x["wordcount"]<2), axis=1)
                df_eng_clean = df_eng_clean[df_eng_clean["drop"] == False]
                del df_eng_clean["drop"]
                del df_eng_clean["wordcount"]
            
                df_eng_clean["CleanCaseTitles"] = df_eng_clean["Incidents_Title_PipeCleansed"]
                del df_eng_clean["Incidents_Title_PipeCleansed"]
                loggerInstance.logToFile("{0}.log".format(trainingId), "DataCleansing: " + str(df_eng_clean.shape[0]) + " incidents will be processed for summarization")
            except Exception as e:
                raise TrainingException("DataCleansing: " + str(e))
            try:
                self.runCaseTitlesExtraction(df_eng_clean, productid, datapath)
            except Exception as e:
                raise TrainingException("CaseTitleExtraction: " + str(e))
        else:
            loggerInstance.logToFile("{0}.log".format(trainingId), "CaseTitleExtraction: Disabled")
            try:
                self.runCaseTitlesExtraction(None, productid, datapath)
            except Exception as e:
                raise TrainingException("CaseTitleExtraction: " + str(e))
예제 #15
0
import pandas
from azure.kusto.data.exceptions import KustoServiceError
from azure.kusto.data.helpers import dataframe_from_result_table
from azure.kusto.data.request import KustoClient, KustoConnectionStringBuilder, ClientRequestProperties

use_cache = False

if use_cache:
    print('using cache')

else:
    cluster = "https://icmcluster.kusto.windows.net"
    output_path = './outputs'
    os.makedirs(output_path, exist_ok=True)

    # It is highly recommended to create one instance and use it for all of your queries.
    kcsb = KustoConnectionStringBuilder.with_aad_device_authentication(cluster)

    # The authentication method will be taken from the chosen KustoConnectionStringBuilder.
    client = KustoClient(kcsb)
    db = "IcmDataWarehouse"
    query = open("kustoicmquery.txt", "r").read()
    file_name = os.path.join(output_path, 'ICM.csv')

    response = client.execute(db, query)

    # we also support dataframes:
    df_incidents = dataframe_from_result_table(response.primary_results[0])

    df_incidents.to_csv(file_name)
예제 #16
0
from azure.kusto.data.request import KustoClient, KustoConnectionStringBuilder
import pandas as pd

LIMIT_BENCHMARK_RUNS = 30
EXPECTED_DURATION_OF_SLEEPING_IN_MINUTES = 10

cluster_name = "https://sqlazureweu2.kustomfa.windows.net"
kustoStringBuilder = KustoConnectionStringBuilder.with_aad_device_authentication(
    cluster_name)
kusto_client = KustoClient(kustoStringBuilder)
connection = CLperfDB.connect()

if __name__ == "__main__":

    #make kusto client fakely
    fake_response = kusto_client.execute("sqlazure1", "MonBackup|take 1")

    number_of_scheduled_benchmarks = 0
    instances_json = open('Configurations/instances.json', 'r+').read()
    instances = json.loads(instances_json)

    #for all jobs
    successful_runs = []
    results = []
    failed_runs = []
    instance_occupation = {}

    for instance_info in instances:
        instance_occupation[instance_info['instance_name']] = -1

    pending_runs = []