Ejemplo n.º 1
def queryAvgValuebyHost(idx, fromDate, toDate):
    val_fld = hp.getValueField(idx)

    def runQuery(fld):
        query = {
            "size": 0,
            "query": {
                "bool": {
                    "must": [
                            "range": {
                                "timestamp": {
                                    "gte": fromDate,
                                    "lte": toDate
            "aggs": {
                "host": {
                    "terms": {
                        "field": fld,
                        "size": 9999
                    "aggs": {
                        "period": {
                            "date_histogram": {
                                "field": "timestamp",
                                "calendar_interval": "day"
                            "aggs": {
                                val_fld: {
                                    "avg": {
                                        "field": val_fld

        return hp.es.search(index=idx, body=query)

    result = {}
    for ft in ['src_host', 'dest_host']:
        data = runQuery(ft)
        temp = []
        for host in data['aggregations']['host']['buckets']:
            for period in host['period']['buckets']:
                temp.append({'host': host['key'], 'period': period['key'],val_fld: period[val_fld]['value']})

        result[ft] = temp
    return result
Ejemplo n.º 2
    def getData(self, src, dest):
        time_list = hp.GetTimeRanges(self.root_parent.dateFrom,

        df = pd.DataFrame(qrs.queryAllValues(self._idx, src, dest, time_list))
        df.rename(columns={hp.getValueField(self._idx): 'value'}, inplace=True)
        if len(df) > 0:
            df['log_value'] = np.log(df['value'].replace(0, np.nan))
            df['sqrt'] = df['value']**(1 / 2)
        return df
Ejemplo n.º 3
    def getValues(self, probdf):
        #     probdf = markNodes()
        df = pd.DataFrame(columns=['timestamp', 'value', 'idx', 'hash'])
        time_list = hp.GetTimeRanges(self.dateFrom, self.dateTo)
        for item in probdf[['src', 'dest', 'idx']].values:
            tempdf = pd.DataFrame(
                qrs.queryAllValues(item[2], item, time_list[0], time_list[1]))
            tempdf['idx'] = item[2]
            tempdf['hash'] = item[0] + "-" + item[1]
            tempdf['src'] = item[0]
            tempdf['dest'] = item[1]
            tempdf.rename(columns={hp.getValueField(item[2]): 'value'},
            df = df.append(tempdf, ignore_index=True)

        return df
Ejemplo n.º 4
def queryAllValuesFromList(idx, fld_type, val_list, period):
    val_fld = hp.getValueField(idx)
    query = {
        "size": 0,
        "_source": {
            "includes": ["timestamp", "dest", "src", val_fld]
        "query": {
          "bool": {
            "must": [
                "range": {
                  "timestamp": {
                    "gte": period[0],
                    "lte": period[1]

                "terms": {
                  fld_type: val_list
    data = scan(client=es, index=idx, query=query, _source=["timestamp", "dest", "src", val_fld], filter_path=['_scroll_id', '_shards', 'hits.hits._source'])
#     scan(client=hp.es, index=idx, query=query)
#     print(idx, str(query).replace("\'", "\""))
    count = 0
    for res in data:
        if not count%100000: print(count)

    return allData
Ejemplo n.º 5
def PairAverageValuesQuery(idx, time_from, time_to, args):
    src = args[0]
    dest = args[1]

    field = hp.getValueField(idx)

    def runQuery(src, dest):
        query = {
                  "size" : 0,
                  "query" : {
                    "bool" : {
                      "must" : [
                          "term" : {
                            "src_host" : {
                              "value" : src
                          "term" : {
                            "dest_host" : {
                              "value" : dest
                          "range" : {
                            "timestamp" : {
                              "from" : time_from,
                              "to" : time_to
                  "_source" : False,
                  "aggregations" : {
                    "groupby" : {
                      "composite" : {
                        "size" : 10000,
                        "sources" : [
                            "src_host" : {
                              "terms" : {
                                "field" : "src_host",
                                "missing_bucket" : True,
                                "order" : "asc"
                            "dest_host" : {
                              "terms" : {
                                "field" : "dest_host",
                                "missing_bucket" : True,
                                "order" : "asc"
                            "ipv6" : {
                              "terms" : {
                                "field" : "ipv6",
                                "missing_bucket" : True,
                                "order" : "asc"
                      "aggregations" : {
                        "ts": {
                          "date_histogram": {
                              "field": "timestamp",
                              "fixed_interval": "30m"
                          "aggs": {
                              field: {
                                  "avg": {
                                      "field": field

        results = hp.es.search(index=idx, body=query)
        res = []
        for item in results["aggregations"]["groupby"]["buckets"]:
            for p in item['ts']['buckets']:
                res.append({'ipv6':item['key']['ipv6'], 'ts': p['key'], field: p[field]['value'],
                                     'doc_count': p['doc_count']})
        return res

    # In case an IP was replaced by the host name at a previous step we need to find all possible values for the given hosts and run the query with each combination of the two
    data = []
    src_items, dest_items = [], []
    for k,v in hp.hosts.items():
        if src == v:
        if dest == v:

    combinations = list(itertools.product(src_items, dest_items))
    for c in combinations:
        output = runQuery(c[0], c[1])
        if len(output) > 0:

    return data
Ejemplo n.º 6
def GetPairsForAHostV1(idx, time_from, time_to, args):
    host = args[0]
    field = hp.getValueField(idx)
    print(host, field, idx, time_from, time_to)
#     def runQuery(host):
    query = {
          "size" : 0,
          "query" : {
            "bool" : {
              "must" : [
                  "bool" : {
                    "should" : [
                        "term" : {
                          "src_host" : {
                            "value" : host
                        "term" : {
                          "dest_host" : {
                            "value" : host
                  "range" : {
                    "timestamp" : {
                      "from" : time_from,
                      "to" : time_to
          "_source" : False,
          "aggregations" : {
            "groupby" : {
              "composite" : {
                "size" : 9999,
                "sources" : [
                    "src_host" : {
                      "terms" : {
                        "field" : "src_host",
                        "missing_bucket" : True,
                        "order" : "asc"
                    "dest_host" : {
                      "terms" : {
                        "field" : "dest_host",
                        "missing_bucket" : True,
                        "order" : "asc"
              "aggregations" : {
                field : {
                  "avg" : {
                    "field" : field

    print(str(query).replace("\'", "\""))
    results = hp.es.search(index=idx, body=query)
    res = []
    for item in results["aggregations"]["groupby"]["buckets"]:
        res.append({'src_host':item['key']['src_host'], 'dest_host':item['key']['dest_host'],
                    field: item[field]['value']})
    return res
Ejemplo n.º 7
def GetPairsForAHost(idx, time_from, time_to, args):
    host = args[0]
    field = hp.getValueField(idx)
    print(host, field, idx, time_from, time_to)
    def runQuery(host):
        query = {
          "size" : 0,
          "query" : {
            "bool" : {
              "must" : [
                  "bool" : {
                    "should" : [
                        "term" : {
                          "src_host" : {
                            "value" : host
                        "term" : {
                          "dest_host" : {
                            "value" : host
                  "range" : {
                    "timestamp" : {
                      "from" : time_from,
                      "to" : time_to
          "_source" : False,
          "aggregations" : {
            "groupby" : {
              "composite" : {
                "size" : 9999,
                "sources" : [
                    "src_host" : {
                      "terms" : {
                        "field" : "src_host",
                        "missing_bucket" : True,
                        "order" : "asc"
                    "dest_host" : {
                      "terms" : {
                        "field" : "dest_host",
                        "missing_bucket" : True,
                        "order" : "asc"
              "aggregations" : {
                field : {
                  "avg" : {
                    "field" : field

        results = hp.es.search(index=idx, body=query)
        res = []
        for item in results["aggregations"]["groupby"]["buckets"]:
            res.append({'src_host':item['key']['src_host'], 'dest_host':item['key']['dest_host'],
                        field: item[field]['value']})
        return res

    data, host_items = [], []

    for k,v in hp.hosts.items():
        if host == v:
            output = runQuery(k)
            if len(output) > 0:

    return data
Ejemplo n.º 8
def queryDailyAvg(idx, fld, dateFrom, dateTo):
    val_fld = hp.getValueField(idx)
    query = {
        "size": 0,
        "query": {
            "bool": {
                "must": [
                        "range": {
                            "timestamp": {
                                "gte": dateFrom,
                                "lte": dateTo
                      "term" : {
                        "src_production" : True
                      "term" : {
                        "dest_production" : True
        "aggs": {
            "avg_values": {
                "terms": {
                    "field": fld,
                    "size": 9999
                "aggs": {
                    "period": {
                        "date_histogram": {
                            "field": "timestamp",
                            "calendar_interval": "day"
                        "aggs": {
                            val_fld: {
                                "avg": {
                                    "field": val_fld

    data = hp.es.search(index=idx, body=query)

    result = {}
    # i = 0
    for ip in data['aggregations']['avg_values']['buckets']:
        temp = {}
        for period in ip['period']['buckets']:
            temp[period['key']] = period[val_fld]['value']
        result[ip['key']] = temp

    return result
Ejemplo n.º 9
def query4Avg(idx, dateFrom, dateTo):
    val_fld = hp.getValueField(idx)
    query = {
              "size" : 0,
              "query" : {
                "bool" : {
                  "must" : [
                      "range" : {
                        "timestamp" : {
                          "gt" : dateFrom,
                          "lte": dateTo
                      "term" : {
                        "src_production" : True
                      "term" : {
                        "dest_production" : True
              "aggregations" : {
                "groupby" : {
                  "composite" : {
                    "size" : 9999,
                    "sources" : [
                        "src" : {
                          "terms" : {
                            "field" : "src"
                        "dest" : {
                          "terms" : {
                            "field" : "dest"
                  "aggs": {
                    val_fld: {
                      "avg": {
                        "field": val_fld

#     print(idx, str(query).replace("\'", "\""))
    aggrs = []

    aggdata = hp.es.search(index=idx, body=query)
    for item in aggdata['aggregations']['groupby']['buckets']:
        aggrs.append({'hash': str(item['key']['src']+'-'+item['key']['dest']),
                      'src': item['key']['src'], 'dest': item['key']['dest'],
                      'value': item[val_fld]['value'],
                      'from': dateFrom,
                      'to': dateTo,
                      'doc_count': item['doc_count']

    return aggrs
Ejemplo n.º 10
def queryAllValues(idx, src, dest, period):
    val_fld = hp.getValueField(idx)
    query = {
            "size": 0,
            "_source": ["timestamp", val_fld],
            "sort": [
                  "timestamp": {
                    "order": "asc"
            "query": {
                "bool": {
                    "must": [
                            "range": {
                                "timestamp": {
                                    "gte": period[0],
                                    "lte": period[1]
                          "term" : {
                            "src" : {
                              "value" : src
                          "term" : {
                            "dest" : {
                              "value" : dest
                          "term" : {
                            "src_production" : True
                          "term" : {
                            "dest_production" : True

    data = scan(client=hp.es, index=idx, query=query)
#     print(idx, str(query).replace("\'", "\""))
    count = 0
    for res in data:
        if not count%100000: print(count)

    return allData
Ejemplo n.º 11
def AggBySrcDestIP(idx, time_from, time_to):
    val_fld = hp.getValueField(idx)
    query = {
              "size" : 0,
              "_source" : False,
              "query" : {
                "range" : {
                  "timestamp" : {
                    "from" : time_from,
                    "to" : time_to
              "aggregations" : {
                "groupby" : {
                  "composite" : {
                    "size" : 10000,
                    "sources" : [
                        "src_host" : {
                          "terms" : {
                            "field" : "src_host",
                            "missing_bucket" : True,
                            "order" : "asc"
                        "ipv6" : {
                          "terms" : {
                            "field" : "ipv6",
                            "missing_bucket" : True,
                            "order" : "asc"
                        "dest_host" : {
                          "terms" : {
                            "field" : "dest_host",
                            "missing_bucket" : True,
                            "order" : "asc"
                  "aggregations" : {
                    "mean_field" : {
                      "avg" : {
                        "field" : val_fld

    results = hp.es.search(index=idx, body=query)

    data = []
    for item in results["aggregations"]["groupby"]["buckets"]:
        data.append({'dest_host':item['key']['dest_host'], 'src_host':item['key']['src_host'], 'ipv6':item['key']['ipv6'],
                     val_fld: item['mean_field']['value'], 'num_tests': item['doc_count']})

    return data