Ejemplo n.º 1
0
def queryAvgValuebyHost(idx, fromDate, toDate):
    val_fld = hp.getValueField(idx)

    def runQuery(fld):
        query = {
            "size": 0,
            "query": {
                "bool": {
                    "must": [
                        {
                            "range": {
                                "timestamp": {
                                    "gte": fromDate,
                                    "lte": toDate
                                }
                            }
                        }
                    ]
                }
            },
            "aggs": {
                "host": {
                    "terms": {
                        "field": fld,
                        "size": 9999
                    },
                    "aggs": {
                        "period": {
                            "date_histogram": {
                                "field": "timestamp",
                                "calendar_interval": "day"
                            },
                            "aggs": {
                                val_fld: {
                                    "avg": {
                                        "field": val_fld
                                    }
                                }
                            }
                        }
                    }
                }
            }
        }

        return hp.es.search(index=idx, body=query)

    result = {}
    for ft in ['src_host', 'dest_host']:
        data = runQuery(ft)
        temp = []
        for host in data['aggregations']['host']['buckets']:
            for period in host['period']['buckets']:
                temp.append({'host': host['key'], 'period': period['key'],val_fld: period[val_fld]['value']})

        result[ft] = temp
    return result
Ejemplo n.º 2
0
    def getData(self, src, dest):
        time_list = hp.GetTimeRanges(self.root_parent.dateFrom,
                                     self.root_parent.dateTo)

        df = pd.DataFrame(qrs.queryAllValues(self._idx, src, dest, time_list))
        df.rename(columns={hp.getValueField(self._idx): 'value'}, inplace=True)
        if len(df) > 0:
            df['log_value'] = np.log(df['value'].replace(0, np.nan))
            df['sqrt'] = df['value']**(1 / 2)
        return df
Ejemplo n.º 3
0
    def getValues(self, probdf):
        #     probdf = markNodes()
        df = pd.DataFrame(columns=['timestamp', 'value', 'idx', 'hash'])
        time_list = hp.GetTimeRanges(self.dateFrom, self.dateTo)
        for item in probdf[['src', 'dest', 'idx']].values:
            tempdf = pd.DataFrame(
                qrs.queryAllValues(item[2], item, time_list[0], time_list[1]))
            tempdf['idx'] = item[2]
            tempdf['hash'] = item[0] + "-" + item[1]
            tempdf['src'] = item[0]
            tempdf['dest'] = item[1]
            tempdf.rename(columns={hp.getValueField(item[2]): 'value'},
                          inplace=True)
            df = df.append(tempdf, ignore_index=True)

        return df
Ejemplo n.º 4
0
def queryAllValuesFromList(idx, fld_type, val_list, period):
    val_fld = hp.getValueField(idx)
    query = {
        "size": 0,
        "_source": {
            "includes": ["timestamp", "dest", "src", val_fld]
          },
        "query": {
          "bool": {
            "must": [
              {
                "range": {
                  "timestamp": {
                    "gte": period[0],
                    "lte": period[1]
                  }

                }
              },
              {
                "terms": {
                  fld_type: val_list
                }
              }
            ]
          }
        }
      }
    data = scan(client=es, index=idx, query=query, _source=["timestamp", "dest", "src", val_fld], filter_path=['_scroll_id', '_shards', 'hits.hits._source'])
#     scan(client=hp.es, index=idx, query=query)
#     print(idx, str(query).replace("\'", "\""))
    count = 0
    allData=[]
    for res in data:
        if not count%100000: print(count)
        allData.append(res['_source'])
        count=count+1

    return allData
Ejemplo n.º 5
0
def PairAverageValuesQuery(idx, time_from, time_to, args):
    src = args[0]
    dest = args[1]

    field = hp.getValueField(idx)

    def runQuery(src, dest):
        query = {
                  "size" : 0,
                  "query" : {
                    "bool" : {
                      "must" : [
                        {
                          "term" : {
                            "src_host" : {
                              "value" : src
                            }
                          }
                        },
                        {
                          "term" : {
                            "dest_host" : {
                              "value" : dest
                            }
                          }
                        },
                        {
                          "range" : {
                            "timestamp" : {
                              "from" : time_from,
                              "to" : time_to
                            }
                          }
                        }
                      ]
                    }
                  },
                  "_source" : False,
                  "aggregations" : {
                    "groupby" : {
                      "composite" : {
                        "size" : 10000,
                        "sources" : [
                          {
                            "src_host" : {
                              "terms" : {
                                "field" : "src_host",
                                "missing_bucket" : True,
                                "order" : "asc"
                              }
                            }
                          },
                          {
                            "dest_host" : {
                              "terms" : {
                                "field" : "dest_host",
                                "missing_bucket" : True,
                                "order" : "asc"
                              }
                            }
                          },
                          {
                            "ipv6" : {
                              "terms" : {
                                "field" : "ipv6",
                                "missing_bucket" : True,
                                "order" : "asc"
                              }
                            }
                          }
                        ]
                      },
                      "aggregations" : {
                        "ts": {
                          "date_histogram": {
                              "field": "timestamp",
                              "fixed_interval": "30m"
                          },
                          "aggs": {
                              field: {
                                  "avg": {
                                      "field": field
                                  }
                              }
                          }
                        }
                      }
                    }
                  }
                }

        results = hp.es.search(index=idx, body=query)
        res = []
        for item in results["aggregations"]["groupby"]["buckets"]:
            for p in item['ts']['buckets']:
                res.append({'ipv6':item['key']['ipv6'], 'ts': p['key'], field: p[field]['value'],
                                     'doc_count': p['doc_count']})
        return res



    # In case an IP was replaced by the host name at a previous step we need to find all possible values for the given hosts and run the query with each combination of the two
    data = []
    src_items, dest_items = [], []
    for k,v in hp.hosts.items():
        if src == v:
            src_items.append(k)
        if dest == v:
            dest_items.append(k)

    combinations = list(itertools.product(src_items, dest_items))
    print(combinations)
    for c in combinations:
        output = runQuery(c[0], c[1])
        if len(output) > 0:
            data.extend(output)

    return data
Ejemplo n.º 6
0
def GetPairsForAHostV1(idx, time_from, time_to, args):
    host = args[0]
    field = hp.getValueField(idx)
    print(host, field, idx, time_from, time_to)
#     def runQuery(host):
    query = {
          "size" : 0,
          "query" : {
            "bool" : {
              "must" : [
                {
                  "bool" : {
                    "should" : [
                      {
                        "term" : {
                          "src_host" : {
                            "value" : host
                          }
                        }
                      },
                      {
                        "term" : {
                          "dest_host" : {
                            "value" : host
                          }
                        }
                      }
                    ]
                  }
                },
                {
                  "range" : {
                    "timestamp" : {
                      "from" : time_from,
                      "to" : time_to
                    }
                  }
                }
              ]
            }
          },
          "_source" : False,
          "aggregations" : {
            "groupby" : {
              "composite" : {
                "size" : 9999,
                "sources" : [
                  {
                    "src_host" : {
                      "terms" : {
                        "field" : "src_host",
                        "missing_bucket" : True,
                        "order" : "asc"
                      }
                    }
                  },
                  {
                    "dest_host" : {
                      "terms" : {
                        "field" : "dest_host",
                        "missing_bucket" : True,
                        "order" : "asc"
                      }
                    }
                  }
                ]
              },
              "aggregations" : {
                field : {
                  "avg" : {
                    "field" : field
                  }
                }
              }
            }
          }
        }

    print(str(query).replace("\'", "\""))
    results = hp.es.search(index=idx, body=query)
    res = []
    for item in results["aggregations"]["groupby"]["buckets"]:
        res.append({'src_host':item['key']['src_host'], 'dest_host':item['key']['dest_host'],
                    field: item[field]['value']})
    return res
Ejemplo n.º 7
0
def GetPairsForAHost(idx, time_from, time_to, args):
    host = args[0]
    field = hp.getValueField(idx)
    print(host, field, idx, time_from, time_to)
    def runQuery(host):
        query = {
          "size" : 0,
          "query" : {
            "bool" : {
              "must" : [
                {
                  "bool" : {
                    "should" : [
                      {
                        "term" : {
                          "src_host" : {
                            "value" : host
                          }
                        }
                      },
                      {
                        "term" : {
                          "dest_host" : {
                            "value" : host
                          }
                        }
                      }
                    ]
                  }
                },
                {
                  "range" : {
                    "timestamp" : {
                      "from" : time_from,
                      "to" : time_to
                    }
                  }
                }
              ]
            }
          },
          "_source" : False,
          "aggregations" : {
            "groupby" : {
              "composite" : {
                "size" : 9999,
                "sources" : [
                  {
                    "src_host" : {
                      "terms" : {
                        "field" : "src_host",
                        "missing_bucket" : True,
                        "order" : "asc"
                      }
                    }
                  },
                  {
                    "dest_host" : {
                      "terms" : {
                        "field" : "dest_host",
                        "missing_bucket" : True,
                        "order" : "asc"
                      }
                    }
                  }
                ]
              },
              "aggregations" : {
                field : {
                  "avg" : {
                    "field" : field
                  }
                }
              }
            }
          }
        }

        results = hp.es.search(index=idx, body=query)
        res = []
        for item in results["aggregations"]["groupby"]["buckets"]:
            res.append({'src_host':item['key']['src_host'], 'dest_host':item['key']['dest_host'],
                        field: item[field]['value']})
        return res

    data, host_items = [], []

    for k,v in hp.hosts.items():
        if host == v:
            output = runQuery(k)
            if len(output) > 0:
                data.extend(output)

    return data
Ejemplo n.º 8
0
def queryDailyAvg(idx, fld, dateFrom, dateTo):
    val_fld = hp.getValueField(idx)
    query = {
        "size": 0,
        "query": {
            "bool": {
                "must": [
                    {
                        "range": {
                            "timestamp": {
                                "gte": dateFrom,
                                "lte": dateTo
                            }
                        }
                    },
                    {
                      "term" : {
                        "src_production" : True
                      }
                    },
                    {
                      "term" : {
                        "dest_production" : True
                      }
                    }
                ]
            }
        },
        "aggs": {
            "avg_values": {
                "terms": {
                    "field": fld,
                    "size": 9999
                },
                "aggs": {
                    "period": {
                        "date_histogram": {
                            "field": "timestamp",
                            "calendar_interval": "day"
                        },
                        "aggs": {
                            val_fld: {
                                "avg": {
                                    "field": val_fld
                                }
                            }
                        }
                    }
                }
            }
        }
    }

    data = hp.es.search(index=idx, body=query)

    result = {}
    # i = 0
    for ip in data['aggregations']['avg_values']['buckets']:
        temp = {}
        for period in ip['period']['buckets']:
            temp[period['key']] = period[val_fld]['value']
        result[ip['key']] = temp

    return result
Ejemplo n.º 9
0
def query4Avg(idx, dateFrom, dateTo):
    val_fld = hp.getValueField(idx)
    query = {
              "size" : 0,
              "query" : {
                "bool" : {
                  "must" : [
                    {
                      "range" : {
                        "timestamp" : {
                          "gt" : dateFrom,
                          "lte": dateTo
                        }
                      }
                    },
                    {
                      "term" : {
                        "src_production" : True
                      }
                    },
                    {
                      "term" : {
                        "dest_production" : True
                      }
                    }
                  ]
                }
              },
              "aggregations" : {
                "groupby" : {
                  "composite" : {
                    "size" : 9999,
                    "sources" : [
                      {
                        "src" : {
                          "terms" : {
                            "field" : "src"
                          }
                        }
                      },
                      {
                        "dest" : {
                          "terms" : {
                            "field" : "dest"
                          }
                        }
                      }
                    ]
                  },
                  "aggs": {
                    val_fld: {
                      "avg": {
                        "field": val_fld
                      }
                    }
                  }
                }
              }
            }


#     print(idx, str(query).replace("\'", "\""))
    aggrs = []

    aggdata = hp.es.search(index=idx, body=query)
    for item in aggdata['aggregations']['groupby']['buckets']:
        aggrs.append({'hash': str(item['key']['src']+'-'+item['key']['dest']),
                      'src': item['key']['src'], 'dest': item['key']['dest'],
                      'value': item[val_fld]['value'],
                      'from': dateFrom,
                      'to': dateTo,
                      'doc_count': item['doc_count']
                     })

    return aggrs
Ejemplo n.º 10
0
def queryAllValues(idx, src, dest, period):
    val_fld = hp.getValueField(idx)
    query = {
            "size": 0,
            "_source": ["timestamp", val_fld],
            "sort": [
                {
                  "timestamp": {
                    "order": "asc"
                  }
                }
            ],
            "query": {
                "bool": {
                    "must": [
                        {
                            "range": {
                                "timestamp": {
                                    "gte": period[0],
                                    "lte": period[1]
                                }
                            }
                        },
                        {
                          "term" : {
                            "src" : {
                              "value" : src
                            }
                          }
                        },
                        {
                          "term" : {
                            "dest" : {
                              "value" : dest
                            }
                          }
                        },
                        {
                          "term" : {
                            "src_production" : True
                          }
                        },
                        {
                          "term" : {
                            "dest_production" : True
                          }
                        }
                    ]
                }
            }
            }

    data = scan(client=hp.es, index=idx, query=query)
#     print(idx, str(query).replace("\'", "\""))
    count = 0
    allData=[]
    for res in data:
        if not count%100000: print(count)
        allData.append(res['_source'])
        count=count+1

    return allData
Ejemplo n.º 11
0
def AggBySrcDestIP(idx, time_from, time_to):
    val_fld = hp.getValueField(idx)
    query = {
              "size" : 0,
              "_source" : False,
              "query" : {
                "range" : {
                  "timestamp" : {
                    "from" : time_from,
                    "to" : time_to
                  }
                }
              },
              "aggregations" : {
                "groupby" : {
                  "composite" : {
                    "size" : 10000,
                    "sources" : [
                      {
                        "src_host" : {
                          "terms" : {
                            "field" : "src_host",
                            "missing_bucket" : True,
                            "order" : "asc"
                          }
                        }
                      },
                      {
                        "ipv6" : {
                          "terms" : {
                            "field" : "ipv6",
                            "missing_bucket" : True,
                            "order" : "asc"
                          }
                        }
                      },
                      {
                        "dest_host" : {
                          "terms" : {
                            "field" : "dest_host",
                            "missing_bucket" : True,
                            "order" : "asc"
                          }
                        }
                      }
                    ]
                  },
                  "aggregations" : {
                    "mean_field" : {
                      "avg" : {
                        "field" : val_fld
                      }
                    }
                  }
                }
              }
            }

    results = hp.es.search(index=idx, body=query)

    data = []
    for item in results["aggregations"]["groupby"]["buckets"]:
        data.append({'dest_host':item['key']['dest_host'], 'src_host':item['key']['src_host'], 'ipv6':item['key']['ipv6'],
                     val_fld: item['mean_field']['value'], 'num_tests': item['doc_count']})

    return data