def get_datetime_list(esorm, deviceID, must_not_list, must_list, startTimestamp, endTimestamp): filter_list = [{"range": {"timestamp": {"gte": startTimestamp * 1000, "lte": endTimestamp * 1000}}}] max_t = int(esorm.field_aggs("max", "timestamp", deviceID, filter_list, must_not_list, must_list) / 1000) d1 = simple_datetime(startTimestamp, str, True) d2 = simple_datetime(endTimestamp, str, True) d3 = simple_datetime(max_t, str, True) datetime_list = [i.replace(" ", "T") for i in date_range(d1, d2, normalize=True).astype(str).values.tolist()[1:]] datetime_list[-1] = d3 return datetime_list
def get_ars_forensics(pk, deviceID, ID, timestamp, user, pageSize, **kwargs): start = simple_datetime(timestamp, str, True) size = pageSize if pk: ids = [{ '_id': get_doc_id(start, deviceID, i, user), '_index': 'ars_scores', '_type': '_doc' } for i in config.ARS_FORENSICS[pk]] res = get_logs_with_ids(ids) for scores_dict in res: if scores_dict: merge_topn(scores_dict, deviceID, pk, timestamp, size) data = {pk: res} else: data = { "ars": { "network": get_network_forensics(deviceID, ID, timestamp, user, pageSize, **kwargs)['network'], "protocols": get_protocols_forensics(deviceID, ID, timestamp, user, pageSize, **kwargs)['protocols'] } } return data
def get_xrs_df(xrs): df_xrs = pd.DataFrame({ "sip": [], xrs + "_scores": [] }).set_index("sip") df_xrs_list = [] value = simple_datetime(xrs_timestamp, str, True) if xrs == 'mrs': try: xrs_scores = mrs_es.match_obj_or_404(field=field, value=value, deviceID=deviceID) except APIDataNotFound: return df_xrs except: logger.exception('Unknow error!') return df_xrs for i in xrs_scores: df = pd.DataFrame(index=[i['_source']['policyID']], data=eval(i['_source']['scores'])) df = df.T df.index.name = 'sip' df_xrs_list.append(df) else: if xrs == 'ars': df = get_df(xrs, deviceID, value, ars_es) df_xrs_list.append(df) df = get_df('eas', deviceID, value, eas_es) else: df = get_df(xrs, deviceID, value, ers_es) df_xrs_list.append(df) if df_xrs_list: if pd.__version__ < config.pd_old1_version: df_xrs = pd.concat(df_xrs_list, axis=1).fillna(0.2) else: df_xrs = pd.concat(df_xrs_list, axis=1, sort=False).fillna(0.2) if xrs == "ers": model_list = df_xrs.columns.tolist() threat_weigth_list = get_threat_level(model_list) df_xrs[xrs + '_scores'] = df_xrs[model_list].mul( threat_weigth_list).max(axis=1) else: df_xrs[xrs + '_scores'] = df_xrs.max(axis=1) return df_xrs.round(1)
def get_query(self): _query = { # "sort": {"@timestamp": {"order": "desc"}}, "query": { "bool": { "must": [{ "term": { "local_orig": { "value": "false" } } }, { "term": { "local_resp": { "value": "true" } } }, { "term": { "proto.keyword": { "value": "tcp" } } }, { "term": { "deviceID.keyword": self.deviceID } }, { "terms": { "conn_state.keyword": ["S1", "S2", "S3", "SF", "RSTO", "RSTR"] } }], "must_not": [ { "terms": { "resp_h": self.white_list } }, { "terms": { "resp_p": self.ignore_ports } }, ], "should": [], "filter": [{ "range": { "resp_p": { "lt": 10000 } } }, { "range": { "@timestamp": { "gte": simple_datetime(self.start_time, str, True), "lt": simple_datetime(self.end_time, str, True) } } }] } }, "size": 0, "aggs": { "three_features": { "composite": { "size": self.config.MAX_AGGS, "sources": [{ "userId": { "terms": { "field": "userId.keyword" } } }, { "dip": { "terms": { "field": "orig_h" } } }, { "dpt": { "terms": { "field": "resp_p" } } }] } } } } return _query
def get_query(self): _query = { # "sort": {"@timestamp": {"order": "desc"}}, "query": { "bool": { "must": [{ "term": { "local_orig": { "value": "false" } } }, { "term": { "local_resp": { "value": "true" } } }, { "term": { "proto.keyword": { "value": "tcp" } } }, { "term": { "deviceID.keyword": self.deviceID } }, { "terms": { "conn_state.keyword": ["S1", "S2", "S3", "SF", "RSTO", "RSTR"] } }], "must_not": [ { "terms": { "resp_h": self.white_list } }, { "terms": { "resp_p": self.ignore_ports } }, ], "should": [], "filter": [{ "range": { "@timestamp": { "gte": simple_datetime(self.start_time, str, True), "lt": simple_datetime(self.end_time, str, True) } } }] } }, "size": 0, "aggs": { "ipterm": { "terms": { "field": "userId.keyword", "min_doc_count": self.at_least, "size": self.config.MAX_AGGS }, "aggs": { "dipterm": { "terms": { "field": "resp_p", "size": self.config.MAX_AGGS }, "aggs": { "orig_h#cnt": { "cardinality": { "field": "orig_h" } }, "scores_selector": { "bucket_selector": { "buckets_path": { "doc_count": "_count", "set_port": "_key", "orig_h_cnt": "orig_h#cnt" }, "script": "(params.doc_count>10 && [4786, 161, 162, 1433, 3306, 1521, 1434, 69, 111, 123, 53].contains(params.set_port)) || (params.orig_h_cnt>10)" } }, "_value": { "bucket_script": { "buckets_path": { "doc_count": "_count", "set_port": "_key", "orig_h_cnt": "orig_h#cnt" }, "script": "params.orig_h_cnt>10?2:(params.doc_count>10 && [4786, 161, 162, 1433, 3306, 1521, 1434, 69, 111, 123, 53].contains(params.set_port))?1:0" } } } }, "value": { "max_bucket": { "buckets_path": "dipterm>_value" } }, "scores_selector": { "bucket_selector": { "buckets_path": { "dipterm": "dipterm._bucket_count" }, "script": "params.dipterm>0" } }, "_fill#dipterm#resp_p#key": { "bucket_script": { "buckets_path": {}, "script": "0" } } } } } } return _query
def aggregate(self, aggs=None, start_datetime=None, end_datetime=None, deviceID=None, must_list=None, filter_list=None, must_not_list=None, group_id=None, time_field="@timestamp", time_include=("gte", "lt"), query=None, **kwargs): """ Aggregate data, if use query, do func twice when group is flag. Search is not allowed in aggregate.If you need other query to use the query parameter. Parameters ---------- start_datetime : datetime Note ---- Sorting is not valid when groupid is empty. """ def _search(query=None, **kwargs): res = self.es_client.search(index=self.index, body=query, **kwargs) return res if query is None: must_list = must_list or [] filter_list = filter_list or [] must_not_list = must_not_list or [] aggs = aggs or {} start_time = simple_datetime(start_datetime, str, True) end_time = simple_datetime(end_datetime, str, True) query = { "query": { "bool": { "must": [{ "match_all": {} }] + must_list, "filter": [{ "range": { time_field: { time_include[0]: start_time, time_include[1]: end_time } } }, { "term": { "deviceID.keyword": deviceID } }] + filter_list, "must_not": must_not_list } }, "from": 0, "size": 0, "aggs": aggs } if group_id == "notset_group": query_userId = copy.deepcopy(query) query_sourceIp = copy.deepcopy(query) query_sourceIp_str = json.dumps(query_sourceIp).replace( "userId.keyword", "sourceIp") query_sourceIp = json.loads(query_sourceIp_str) query_userId["query"]["bool"]["must"] += [{ "exists": { "field": "userId" } }] res_userId = _search(query_userId, **kwargs) query_sourceIp["query"]["bool"]["must_not"] += [{ "exists": { "field": "userId" } }] query_sourceIp["query"]["bool"]["must_not"].remove( query_sourceIp["query"]["bool"]["must_not"][-2]) res_sourceIp = _search(query_sourceIp, **kwargs) res = concat_dict(res_userId, res_sourceIp) else: res = _search(query, **kwargs) else: res = _search(query, **kwargs) return res
def search(self, use_scan=False, start_datetime=None, end_datetime=None, deviceID=None, must_list=None, filter_list=None, must_not_list=None, group_id=None, source=None, use_from=None, use_sort=None, time_field="@timestamp", time_include=("gte", "lt"), query=None, **kwargs): """ Search or scan docs. Behind the scenes this method calls search(…). If use query, do func twice when group is flag("notset_group"). If you need other query to use the query parameter. """ def _search(use_scan=False, query=None, **kwargs): if use_scan: res = scan(client=self.es_client, index=self.index, query=query, **kwargs) else: res = self.es_client.search(index=self.index, body=query, **kwargs) return res if query is None: must_list = must_list or [] filter_list = filter_list or [] must_not_list = must_not_list or [] start_time = simple_datetime(start_datetime, str, True) end_time = simple_datetime(end_datetime, str, True) query = { "query": { "bool": { "must": [{ "match_all": {} }] + must_list, "filter": [{ "range": { time_field: { time_include[0]: start_time, time_include[1]: end_time } } }, { "term": { "deviceID.keyword": deviceID } }] + filter_list, "must_not": must_not_list } } } if source: query["_source"] = source if use_from: query["from"] = use_from if use_sort: query["sort"] = use_sort return _search(use_scan, query, **kwargs)
def get_risk_level(deviceID=None, xrs=None, field="timestamp"): ers_es = ERSResultsORM() mrs_es = MRSResultsORM() ars_es = ARSResultsORM() eas_es = EASResultsORM() es = ElasticsearchORM( index=['ars_results', 'mrs_results', 'ers_results', 'eas_results']) xrs_timestamp = int( es.field_aggs(mode='max', field=field, deviceID=deviceID)) / 1000 # max time in es # use in next version # wait mrs struct update # query = { # "_source": ["username", "scores", "policyID"], # "query": { # "bool": { # "must": [{"term": {"deviceID.keyword": deviceID}}], # "must_not": [{"term": {"scores": 0}}], # "filter": [{"term": {"timestamp": simple_datetime(xrs_timestamp, str, True)}}] # } # } # } # test = es.search(True, query=query) # tf = pd.DataFrame(test) if simple_datetime(None, int) - xrs_timestamp > 24 * 60 * 60: raise APIBaseError('No scores in {}.'.format(xrs_timestamp)) # xrs_timestamp = 1608775200 # @cache_value def get_df(xrs, deviceID, value, es): query = { "_source": ["username", "scores", "policyID"], "query": { "bool": { "must": [{ "term": { "deviceID.keyword": deviceID } }], "must_not": [{ "term": { "scores": 0 } }], "filter": [{ "term": { "timestamp": value } }] } } } xrs_scores = es.search(True, query=query) df = pd.DataFrame(xrs_scores) size = df.index.size logger.info("xrs:{} size:{} ,if size is 0, pass error.".format( xrs, size)) if size != 0: df = pd.DataFrame(df.loc[:, "_source"].values.tolist()) try: df = df.set_index(["username", "policyID"]).unstack(level=1) except ValueError: df_cp = df.set_index(["username", "policyID"]) mult_index = df_cp.index.values.tolist() cnt = Counter(mult_index) duplicates = [key for key in cnt.keys() if cnt[key] > 1] logger.error( 'ValueError: Index contains duplicate entries, cannot reshape with {}' .format(duplicates)) return pd.DataFrame() if xrs == 'eas': df.columns = 'endpoint_' + df.columns.droplevel() else: df.columns = df.columns.droplevel() df.columns.name = None df.index.name = 'sip' return df def get_threat_level(model_list, fast_path=False): ers_models = merge_models(deviceID) threat_level_map = { i["modelID"]: i["threat_level"] for i in ers_models["ersModels"] } threat_level_list = [threat_level_map.get(i, 4) for i in model_list] if fast_path: return threat_level_list threat_weigth_list = [ config.threat_level_mapping.get(i) for i in threat_level_list ] return threat_weigth_list @cache_value def get_xrs_df(xrs): df_xrs = pd.DataFrame({ "sip": [], xrs + "_scores": [] }).set_index("sip") df_xrs_list = [] value = simple_datetime(xrs_timestamp, str, True) if xrs == 'mrs': try: xrs_scores = mrs_es.match_obj_or_404(field=field, value=value, deviceID=deviceID) except APIDataNotFound: return df_xrs except: logger.exception('Unknow error!') return df_xrs for i in xrs_scores: df = pd.DataFrame(index=[i['_source']['policyID']], data=eval(i['_source']['scores'])) df = df.T df.index.name = 'sip' df_xrs_list.append(df) else: if xrs == 'ars': df = get_df(xrs, deviceID, value, ars_es) df_xrs_list.append(df) df = get_df('eas', deviceID, value, eas_es) else: df = get_df(xrs, deviceID, value, ers_es) df_xrs_list.append(df) if df_xrs_list: if pd.__version__ < config.pd_old1_version: df_xrs = pd.concat(df_xrs_list, axis=1).fillna(0.2) else: df_xrs = pd.concat(df_xrs_list, axis=1, sort=False).fillna(0.2) if xrs == "ers": model_list = df_xrs.columns.tolist() threat_weigth_list = get_threat_level(model_list) df_xrs[xrs + '_scores'] = df_xrs[model_list].mul( threat_weigth_list).max(axis=1) else: df_xrs[xrs + '_scores'] = df_xrs.max(axis=1) return df_xrs.round(1) @catch_except({"defaults": 0.20, "order": [], "scores": {}}, inlog=True) def get_riskLevel(): df_ars = get_xrs_df('ars') df_mrs = get_xrs_df('mrs') df_ers = get_xrs_df('ers') df = pd.concat([ df_ars[['ars_scores']], df_mrs[['mrs_scores']], df_ers[['ers_scores']] ], axis=1).fillna(0.2).replace(0, 0.2) if not df.empty: df['scores'] = df.apply(_risk_level, axis=1) risk_data = {"defaults": 1, "timestamp": xrs_timestamp} risk_data['scores'] = df[['scores']].to_dict("index") return risk_data @catch_except({"defaults": 0.20, "order": [], "scores": {}}, inlog=True) def get_arsScores(): df_ars = get_xrs_df('ars') ars_data = { "defaults": 0.20, "timestamp": xrs_timestamp, "order": [], "feature_order": [], "feature_mode": "max", "scores": {} } un_columns = ['network', 'protocols', 'events', 'endpoint'] ars_data['order'] = un_columns for col_name in un_columns: selector = df_ars.columns[df_ars.columns.str.contains( col_name)].astype(str) ars_data['feature_order'].append( [i.split('_')[-1] for i in selector.tolist()]) if pd.__version__ < config.pd_old1_version: df_ars[col_name] = df_ars.apply( lambda x: [x[i] for i in x.keys() if col_name in i], axis=1) else: df_ars[col_name] = df_ars[selector].apply( lambda x: x.tolist(), axis=1).apply(lambda x: x if isinstance(x, list) else []) df_ars = df_ars[un_columns] df_ars['scores'] = df_ars.apply(lambda x: [[i for i in x]], axis=1).apply(lambda x: x[0]) ars_data['scores'] = df_ars[['scores']].to_dict('index') return ars_data @catch_except({"defaults": 0.20, "order": [], "scores": {}}, inlog=False) def get_xrsScores(xrs): df_xrs = get_xrs_df(xrs) xrs_data = { "defaults": 0.20, "timestamp": xrs_timestamp, "order": [], "scores": {} } df_xrs = df_xrs.drop([xrs + '_scores'], axis=1) model_list = df_xrs.columns.tolist() xrs_data['order'] = model_list if xrs == "ers": xrs_data['threat_level'] = get_threat_level(model_list, True) df_xrs['scores'] = df_xrs.apply( lambda x: [[i for i in x]], axis=1).apply(lambda x: x[0] if len(x) == 1 else x) xrs_data['scores'] = df_xrs[['scores']].to_dict('index') return xrs_data if xrs == 'riskLevel': data = get_riskLevel() elif xrs == 'arsScores': data = get_arsScores() elif xrs == 'ersScores' or xrs == 'mrsScores': data = get_xrsScores(xrs[:3]) elif xrs is None: data = get_riskLevel(), get_arsScores(), get_xrsScores( 'ers'), get_xrsScores('mrs') return data
def post(self, deviceID, paramID=None): data = defaultdict(dict) data_pre = request.data data_pre['deviceID'] = deviceID serializer = EASSerializer(data=data_pre) serializer.is_valid(raise_exception=True) kwargs = serializer.data startTimestamp = kwargs['startTimestamp'] endTimestamp = kwargs['endTimestamp'] size = kwargs.pop('size') from_size = kwargs.pop('from_size') must_list = kwargs.pop('must_list') must_not_list = [{"term": {"scores": 0}}] if paramID: must_list += [{"term": {"anomalyID.keyword": paramID}}] anomaly = get_anomaly(deviceID, paramID) data[paramID]['name'] = anomaly['name'] else: paramID = "_all" anomalies = get_anomaly(deviceID, paramID) data[paramID]['name'] = {"en": "All anomaly", "zh": "全部异常"} esorm = AnomaliesScoresORM() datetime_list = get_datetime_list(esorm, deviceID, must_not_list, must_list, **kwargs) min_bound_date = simple_datetime(startTimestamp, str, True)[:10] max_bound_date = simple_datetime(endTimestamp - 3600 * 24, str, True)[:10] demo = { "hits": 0, "top": { "abnormalHits": [], "abnormalScores": [] }, "histogram": { "abnormalHits": [] }, } data[paramID].update(demo) query = { "size": size, "from": from_size, "_source": ["username", "scores"], "sort": [ {"scores": {"order": "desc"}} ], "query": { "bool": { "must": must_list, "filter": [ {"terms": {"timestamp": datetime_list}}, {"term": {"deviceID.keyword": deviceID}} ], "must_not": must_not_list } }, "aggs": { "count_anomaly": { "terms": { "field": "anomalyID.keyword", "size": config.MAX_AGGS }, "aggs": { "histinfo": { "date_histogram": { "field": "timestamp", "interval": "day", "extended_bounds": { "min": min_bound_date, "max": max_bound_date }, "min_doc_count": 0, "format": "yyyy-MM-dd" }, "aggs": { "clone_count": { "cumulative_sum": { "buckets_path": "_count" } } } } } }, "top_hits": { "terms": { "field": "username.keyword", "size": config.TOP_SIZA, "show_term_doc_count_error": True } }, "top_scores": { "terms": { "field": "username.keyword", "size": config.TOP_SIZA, "show_term_doc_count_error": True, "order": [ {"max_scores": "desc"} ] }, "aggs": { "max_scores": { "max": { "field": "scores" } } } } } } res = esorm.aggregate(query=query) if res["hits"]["total"] != 0: data[paramID]["hits"] = res["hits"]["total"] acab = res["aggregations"]["count_anomaly"]["buckets"] athb = res["aggregations"]["top_hits"]["buckets"] atsb = res["aggregations"]["top_scores"]["buckets"] data[paramID]["histogram"]["abnormalHits"] = sum([[j["doc_count"] for j in i["histinfo"]["buckets"]] for i in acab], axis=0) # .tolist() data[paramID]["top"]["abnormalHits"] = [{"username": i["key"], "hits": i["doc_count"]} for i in athb[0:size]] data[paramID]["top"]["abnormalScores"] = [{"username": i["key"], "scores": i["max_scores"]["value"]} for i in atsb[0:size]] if paramID == "_all": data[paramID]["anomalies_hits"] = [{"anomalyID": i["key"], "hits": i["doc_count"], "name": anomalies[i["key"]]["name"]} for i in acab] return Response(data)
def get_query(self): _query = { # "sort": {"@timestamp": {"order": "desc"}}, "query": { "bool": { "must": [ {"term": {"local_orig": {"value": "false"}}}, {"term": {"local_resp": {"value": "true"}}}, {"term": {"proto.keyword": {"value": "tcp"}}}, {"term": {"deviceID.keyword": self.deviceID}}, {"terms": {"conn_state.keyword": ["S1", "S2", "S3", "SF", "RSTO", "RSTR"]}} ], "must_not": [ {"terms": {"resp_h": self.white_list}}, {"terms": {"resp_p": self.ignore_ports}}, ], "should": [ ], "filter": [ {"range": {"resp_p": {"lt": 10000}}}, { "range": { "@timestamp": { "gte": simple_datetime(self.start_time, str, True), "lt": simple_datetime(self.end_time, str, True) } } } ] } }, "size": 0, "aggs": { "ipterm": { "terms": { "field": "userId.keyword", "min_doc_count": self.at_least[0], "size": self.config.MAX_AGGS }, "aggs": { "dipterm": { "terms": { "field": "resp_h", "size": self.config.MAX_AGGS }, "aggs": { "resp_p#cnt": { "cardinality": { "field": "resp_p" } }, "orig_p#cnt": { "cardinality": { "field": "orig_p" } }, "scores_selector": { "bucket_selector": { "buckets_path": { "resp_pcnt": "resp_p#cnt", "orig_pcnt": "orig_p#cnt" }, "script": "params.resp_pcnt>{} && params.orig_pcnt>{}".format(*self.at_least) } } } }, "scores_selector": { "bucket_selector": { "buckets_path": { "dipterm": "dipterm._bucket_count" }, "script": "params.dipterm>0" } }, "value": { "bucket_script": { "buckets_path": {}, "script": "1" } }, "_fill#dipterm#orig_h#key": { "bucket_script": { "buckets_path": { }, "script": "0" } }, "count": { "sum_bucket": { "buckets_path": "dipterm._count" } } } } } } return _query
def validate_timestamp(self, timestamp): time_deviation = config.TIME_DEVIATION t_now = simple_datetime(None, int) if not t_now - time_deviation < timestamp < t_now + time_deviation: raise APIINPUTERROR(_('Invalid timestamp.')) return timestamp
def get_anomaly_forensics(deviceID=None, ID=None, temp=None, xrs=None, user=None, pageSize=None, timestamp=None): try: data = {} es_orm = AnomaliesScoresORM() start = simple_datetime(timestamp, str, True) doc_id = get_doc_id(start, deviceID, ID, user) res = es_orm.get_obj_or_404(doc_id=doc_id) if res.get("scores", 1) != 0: if temp is None: if xrs == 'eas': temp = get_anomaly(ID) elif xrs == 'ers': temp = get_ers_models(deviceID)['params'][ID] else: raise Exception data[ID] = temp data[ID]['scores'] = res.get('scores', -1) from_ = (pageSize - 1) * 5 if res['details'].get('logs'): size = pageSize * 5 log_size = len(res["details"]["logs"]) ids = res['details']['logs'][from_:size] res['details']['logs'] = get_logs_with_ids(ids) res['details']['size'] = log_size else: index = res['details'].pop('index', None) index_list = res['details'].pop('index_list', None) query = res['details'].pop('query', {}) index = index_list or index if index and query != {}: size = 5 res['details']['logs'], res['details'][ 'size'] = get_logs_with_query(index, query, from_, size) if 'display' in temp and xrs == 'eas' and 'agg_query' in temp[ 'forensics']['graphs']['template'][ 0]: # if anomalyid has no graphs,exception,短路 aggs_querys = {} for graph in temp['forensics']['graphs']['template']: aggs_querys.update(graph['agg_query']) _query = json.loads(query) _query['aggs'] = aggs_querys graphs_values = ElasticsearchORM(index).search( query=_query)['aggregations'] remove_buckets(graphs_values) res['graphs'] = graphs_values if ID in [ "23787c99-4b94-4514-a38e-f753b8f47e57", "c91dd8fa-af7f-11e9-a5a5-144f8a006a90" ]: for i in res['details']['logs']: if "geoip" in i: if i['geoip']['country_name'] in [ "Taiwan", "Hong Kong", "Macao" ]: i['geoip']['country_name'] = "China " + i[ 'geoip']['country_name'] dct = data[ID]['forensics'] data[ID]['forensics'] = dict_merge(dct, res) # added by wendong, compatible with version 3.3 if config.UCSS_VERSION == 3.3: for k, v in data.items(): graphs = v["forensics"]["graphs"] _graphs = copy.deepcopy(graphs) for i in _graphs["template"]: if i["type"] == 1: graphs["template"].remove(i) continue elif i["type"] == 2: graphs["histCnt"] = get_histCnt(graphs["histCnt"], timestamp) elif i["type"] == 3: graphs["timeseries"] = [ item["key_as_string"] for item in graphs["timeseries"] ] return data except APIDataNotFound: logger.debug( "{}ScoresORM 404_id:{} start:{} deviceID ID:{} userId".format( xrs.upper(), doc_id, timestamp, ID)) return {} except: logger.exception("{} {} {} {} {}\n".format(timestamp, deviceID, ID, user, pageSize)) return {}
def merge_topn(scores_dict, deviceID, pk, timestamp, size, histdays=1, at_least=None): feature_must = { "http": [{ "terms": { "channelType": [1, 2] } }], "domain": [{ "terms": { "channelType": [1, 2] } }], "post": [{ "terms": { "method.keyword": ["POST", "PUT", "PATCH", "DELETE"] } }], "get": [{ "terms": { "method.keyword": ["GET", "HEAD", "OPTIONS"] } }], "ssl": [{ "term": { "service.keyword": "ssl" } }], "udp": [{ "term": { "proto.keyword": "udp" } }], "tcp": [{ "term": { "proto.keyword": "tcp" } }], "icmp": [{ "term": { "proto.keyword": "icmp" } }] } if pk == 'network': es = auto_client(pk, histdays + 1, simple_datetime(timestamp, datetime.datetime).date()) term_field = 'resp_h' sum_field = 'orig_ip_bytes' else: es = auto_client('swg', histdays + 1, simple_datetime(timestamp, datetime.datetime).date()) term_field = 'uriDomain.keyword' sum_field = 'sendBytes' _query = { "query": { "bool": { "must": [{ "term": { "deviceID.keyword": deviceID } }, { "term": { "userId.keyword": scores_dict['userId'] } }] + feature_must[scores_dict['feature']], "filter": [{ "range": { "@timestamp": { "gte": (timestamp - histdays * 3600 * 24) * 1000, "lt": timestamp * 1000 } } }] } }, "size": 0, "aggs": { "tarinfo": { "terms": { "field": term_field, "size": size }, "aggs": { "bytes_sum": { "sum": { "field": sum_field } } } } } } scores_dict['tarinfo'] = es.aggregate( query=_query)['aggregations']['tarinfo']['buckets'] return