Esempio n. 1
0
def get_datetime_list(esorm, deviceID, must_not_list, must_list, startTimestamp, endTimestamp):
    filter_list = [{"range": {"timestamp": {"gte": startTimestamp * 1000, "lte": endTimestamp * 1000}}}]
    max_t = int(esorm.field_aggs("max", "timestamp", deviceID, filter_list, must_not_list, must_list) / 1000)
    d1 = simple_datetime(startTimestamp, str, True)
    d2 = simple_datetime(endTimestamp, str, True)
    d3 = simple_datetime(max_t, str, True)
    datetime_list = [i.replace(" ", "T") for i in date_range(d1, d2, normalize=True).astype(str).values.tolist()[1:]]
    datetime_list[-1] = d3
    return datetime_list
Esempio n. 2
0
def get_ars_forensics(pk, deviceID, ID, timestamp, user, pageSize, **kwargs):
    start = simple_datetime(timestamp, str, True)
    size = pageSize

    if pk:
        ids = [{
            '_id': get_doc_id(start, deviceID, i, user),
            '_index': 'ars_scores',
            '_type': '_doc'
        } for i in config.ARS_FORENSICS[pk]]
        res = get_logs_with_ids(ids)
        for scores_dict in res:
            if scores_dict:
                merge_topn(scores_dict, deviceID, pk, timestamp, size)
        data = {pk: res}

    else:
        data = {
            "ars": {
                "network":
                get_network_forensics(deviceID, ID, timestamp, user, pageSize,
                                      **kwargs)['network'],
                "protocols":
                get_protocols_forensics(deviceID, ID, timestamp, user,
                                        pageSize, **kwargs)['protocols']
            }
        }

    return data
Esempio n. 3
0
    def get_xrs_df(xrs):
        df_xrs = pd.DataFrame({
            "sip": [],
            xrs + "_scores": []
        }).set_index("sip")
        df_xrs_list = []
        value = simple_datetime(xrs_timestamp, str, True)

        if xrs == 'mrs':
            try:
                xrs_scores = mrs_es.match_obj_or_404(field=field,
                                                     value=value,
                                                     deviceID=deviceID)
            except APIDataNotFound:
                return df_xrs
            except:
                logger.exception('Unknow error!')
                return df_xrs

            for i in xrs_scores:
                df = pd.DataFrame(index=[i['_source']['policyID']],
                                  data=eval(i['_source']['scores']))
                df = df.T
                df.index.name = 'sip'
                df_xrs_list.append(df)
        else:
            if xrs == 'ars':
                df = get_df(xrs, deviceID, value, ars_es)
                df_xrs_list.append(df)
                df = get_df('eas', deviceID, value, eas_es)
            else:
                df = get_df(xrs, deviceID, value, ers_es)
            df_xrs_list.append(df)

        if df_xrs_list:
            if pd.__version__ < config.pd_old1_version:
                df_xrs = pd.concat(df_xrs_list, axis=1).fillna(0.2)
            else:
                df_xrs = pd.concat(df_xrs_list, axis=1, sort=False).fillna(0.2)

            if xrs == "ers":
                model_list = df_xrs.columns.tolist()
                threat_weigth_list = get_threat_level(model_list)
                df_xrs[xrs + '_scores'] = df_xrs[model_list].mul(
                    threat_weigth_list).max(axis=1)
            else:
                df_xrs[xrs + '_scores'] = df_xrs.max(axis=1)
        return df_xrs.round(1)
Esempio n. 4
0
 def get_query(self):
     _query = {
         # "sort": {"@timestamp": {"order": "desc"}},
         "query": {
             "bool": {
                 "must": [{
                     "term": {
                         "local_orig": {
                             "value": "false"
                         }
                     }
                 }, {
                     "term": {
                         "local_resp": {
                             "value": "true"
                         }
                     }
                 }, {
                     "term": {
                         "proto.keyword": {
                             "value": "tcp"
                         }
                     }
                 }, {
                     "term": {
                         "deviceID.keyword": self.deviceID
                     }
                 }, {
                     "terms": {
                         "conn_state.keyword":
                         ["S1", "S2", "S3", "SF", "RSTO", "RSTR"]
                     }
                 }],
                 "must_not": [
                     {
                         "terms": {
                             "resp_h": self.white_list
                         }
                     },
                     {
                         "terms": {
                             "resp_p": self.ignore_ports
                         }
                     },
                 ],
                 "should": [],
                 "filter": [{
                     "range": {
                         "resp_p": {
                             "lt": 10000
                         }
                     }
                 }, {
                     "range": {
                         "@timestamp": {
                             "gte": simple_datetime(self.start_time, str,
                                                    True),
                             "lt": simple_datetime(self.end_time, str, True)
                         }
                     }
                 }]
             }
         },
         "size": 0,
         "aggs": {
             "three_features": {
                 "composite": {
                     "size":
                     self.config.MAX_AGGS,
                     "sources": [{
                         "userId": {
                             "terms": {
                                 "field": "userId.keyword"
                             }
                         }
                     }, {
                         "dip": {
                             "terms": {
                                 "field": "orig_h"
                             }
                         }
                     }, {
                         "dpt": {
                             "terms": {
                                 "field": "resp_p"
                             }
                         }
                     }]
                 }
             }
         }
     }
     return _query
Esempio n. 5
0
 def get_query(self):
     _query = {
         # "sort": {"@timestamp": {"order": "desc"}},
         "query": {
             "bool": {
                 "must": [{
                     "term": {
                         "local_orig": {
                             "value": "false"
                         }
                     }
                 }, {
                     "term": {
                         "local_resp": {
                             "value": "true"
                         }
                     }
                 }, {
                     "term": {
                         "proto.keyword": {
                             "value": "tcp"
                         }
                     }
                 }, {
                     "term": {
                         "deviceID.keyword": self.deviceID
                     }
                 }, {
                     "terms": {
                         "conn_state.keyword":
                         ["S1", "S2", "S3", "SF", "RSTO", "RSTR"]
                     }
                 }],
                 "must_not": [
                     {
                         "terms": {
                             "resp_h": self.white_list
                         }
                     },
                     {
                         "terms": {
                             "resp_p": self.ignore_ports
                         }
                     },
                 ],
                 "should": [],
                 "filter": [{
                     "range": {
                         "@timestamp": {
                             "gte": simple_datetime(self.start_time, str,
                                                    True),
                             "lt": simple_datetime(self.end_time, str, True)
                         }
                     }
                 }]
             }
         },
         "size": 0,
         "aggs": {
             "ipterm": {
                 "terms": {
                     "field": "userId.keyword",
                     "min_doc_count": self.at_least,
                     "size": self.config.MAX_AGGS
                 },
                 "aggs": {
                     "dipterm": {
                         "terms": {
                             "field": "resp_p",
                             "size": self.config.MAX_AGGS
                         },
                         "aggs": {
                             "orig_h#cnt": {
                                 "cardinality": {
                                     "field": "orig_h"
                                 }
                             },
                             "scores_selector": {
                                 "bucket_selector": {
                                     "buckets_path": {
                                         "doc_count": "_count",
                                         "set_port": "_key",
                                         "orig_h_cnt": "orig_h#cnt"
                                     },
                                     "script":
                                     "(params.doc_count>10 && [4786, 161, 162, 1433, 3306, 1521, 1434, 69, 111, 123, 53].contains(params.set_port)) || (params.orig_h_cnt>10)"
                                 }
                             },
                             "_value": {
                                 "bucket_script": {
                                     "buckets_path": {
                                         "doc_count": "_count",
                                         "set_port": "_key",
                                         "orig_h_cnt": "orig_h#cnt"
                                     },
                                     "script":
                                     "params.orig_h_cnt>10?2:(params.doc_count>10 && [4786, 161, 162, 1433, 3306, 1521, 1434, 69, 111, 123, 53].contains(params.set_port))?1:0"
                                 }
                             }
                         }
                     },
                     "value": {
                         "max_bucket": {
                             "buckets_path": "dipterm>_value"
                         }
                     },
                     "scores_selector": {
                         "bucket_selector": {
                             "buckets_path": {
                                 "dipterm": "dipterm._bucket_count"
                             },
                             "script": "params.dipterm>0"
                         }
                     },
                     "_fill#dipterm#resp_p#key": {
                         "bucket_script": {
                             "buckets_path": {},
                             "script": "0"
                         }
                     }
                 }
             }
         }
     }
     return _query
Esempio n. 6
0
    def aggregate(self,
                  aggs=None,
                  start_datetime=None,
                  end_datetime=None,
                  deviceID=None,
                  must_list=None,
                  filter_list=None,
                  must_not_list=None,
                  group_id=None,
                  time_field="@timestamp",
                  time_include=("gte", "lt"),
                  query=None,
                  **kwargs):
        """
        Aggregate data, if use query, do func twice when group is flag.
        Search is not allowed in aggregate.If you need other query to
        use the query parameter.

        Parameters
        ----------
        start_datetime : datetime

        Note
        ----
        Sorting is not valid when groupid is empty.
        """
        def _search(query=None, **kwargs):
            res = self.es_client.search(index=self.index, body=query, **kwargs)
            return res

        if query is None:
            must_list = must_list or []
            filter_list = filter_list or []
            must_not_list = must_not_list or []
            aggs = aggs or {}
            start_time = simple_datetime(start_datetime, str, True)
            end_time = simple_datetime(end_datetime, str, True)
            query = {
                "query": {
                    "bool": {
                        "must": [{
                            "match_all": {}
                        }] + must_list,
                        "filter": [{
                            "range": {
                                time_field: {
                                    time_include[0]: start_time,
                                    time_include[1]: end_time
                                }
                            }
                        }, {
                            "term": {
                                "deviceID.keyword": deviceID
                            }
                        }] + filter_list,
                        "must_not":
                        must_not_list
                    }
                },
                "from": 0,
                "size": 0,
                "aggs": aggs
            }
            if group_id == "notset_group":
                query_userId = copy.deepcopy(query)
                query_sourceIp = copy.deepcopy(query)
                query_sourceIp_str = json.dumps(query_sourceIp).replace(
                    "userId.keyword", "sourceIp")
                query_sourceIp = json.loads(query_sourceIp_str)
                query_userId["query"]["bool"]["must"] += [{
                    "exists": {
                        "field": "userId"
                    }
                }]
                res_userId = _search(query_userId, **kwargs)
                query_sourceIp["query"]["bool"]["must_not"] += [{
                    "exists": {
                        "field": "userId"
                    }
                }]
                query_sourceIp["query"]["bool"]["must_not"].remove(
                    query_sourceIp["query"]["bool"]["must_not"][-2])
                res_sourceIp = _search(query_sourceIp, **kwargs)
                res = concat_dict(res_userId, res_sourceIp)
            else:
                res = _search(query, **kwargs)
        else:
            res = _search(query, **kwargs)
        return res
Esempio n. 7
0
    def search(self,
               use_scan=False,
               start_datetime=None,
               end_datetime=None,
               deviceID=None,
               must_list=None,
               filter_list=None,
               must_not_list=None,
               group_id=None,
               source=None,
               use_from=None,
               use_sort=None,
               time_field="@timestamp",
               time_include=("gte", "lt"),
               query=None,
               **kwargs):
        """
        Search or scan docs. Behind the scenes this method calls search(…).
        If use query, do func twice when group is flag("notset_group").
        If you need other query to use the query parameter.
        """
        def _search(use_scan=False, query=None, **kwargs):
            if use_scan:
                res = scan(client=self.es_client,
                           index=self.index,
                           query=query,
                           **kwargs)
            else:
                res = self.es_client.search(index=self.index,
                                            body=query,
                                            **kwargs)
            return res

        if query is None:
            must_list = must_list or []
            filter_list = filter_list or []
            must_not_list = must_not_list or []
            start_time = simple_datetime(start_datetime, str, True)
            end_time = simple_datetime(end_datetime, str, True)
            query = {
                "query": {
                    "bool": {
                        "must": [{
                            "match_all": {}
                        }] + must_list,
                        "filter": [{
                            "range": {
                                time_field: {
                                    time_include[0]: start_time,
                                    time_include[1]: end_time
                                }
                            }
                        }, {
                            "term": {
                                "deviceID.keyword": deviceID
                            }
                        }] + filter_list,
                        "must_not":
                        must_not_list
                    }
                }
            }
            if source:
                query["_source"] = source
            if use_from:
                query["from"] = use_from
            if use_sort:
                query["sort"] = use_sort
        return _search(use_scan, query, **kwargs)
Esempio n. 8
0
def get_risk_level(deviceID=None, xrs=None, field="timestamp"):
    ers_es = ERSResultsORM()
    mrs_es = MRSResultsORM()
    ars_es = ARSResultsORM()
    eas_es = EASResultsORM()

    es = ElasticsearchORM(
        index=['ars_results', 'mrs_results', 'ers_results', 'eas_results'])
    xrs_timestamp = int(
        es.field_aggs(mode='max', field=field,
                      deviceID=deviceID)) / 1000  # max time in es
    # use in next version
    # wait mrs struct update
    # query = {
    #     "_source": ["username", "scores", "policyID"],
    #     "query": {
    #         "bool": {
    #             "must": [{"term": {"deviceID.keyword": deviceID}}],
    #             "must_not": [{"term": {"scores": 0}}],
    #             "filter": [{"term": {"timestamp": simple_datetime(xrs_timestamp, str, True)}}]
    #         }
    #     }
    # }
    # test = es.search(True, query=query)
    # tf = pd.DataFrame(test)

    if simple_datetime(None, int) - xrs_timestamp > 24 * 60 * 60:
        raise APIBaseError('No scores in {}.'.format(xrs_timestamp))

    # xrs_timestamp = 1608775200

    # @cache_value
    def get_df(xrs, deviceID, value, es):
        query = {
            "_source": ["username", "scores", "policyID"],
            "query": {
                "bool": {
                    "must": [{
                        "term": {
                            "deviceID.keyword": deviceID
                        }
                    }],
                    "must_not": [{
                        "term": {
                            "scores": 0
                        }
                    }],
                    "filter": [{
                        "term": {
                            "timestamp": value
                        }
                    }]
                }
            }
        }
        xrs_scores = es.search(True, query=query)
        df = pd.DataFrame(xrs_scores)
        size = df.index.size
        logger.info("xrs:{} size:{} ,if size is 0, pass error.".format(
            xrs, size))
        if size != 0:
            df = pd.DataFrame(df.loc[:, "_source"].values.tolist())
            try:
                df = df.set_index(["username", "policyID"]).unstack(level=1)
            except ValueError:
                df_cp = df.set_index(["username", "policyID"])
                mult_index = df_cp.index.values.tolist()
                cnt = Counter(mult_index)
                duplicates = [key for key in cnt.keys() if cnt[key] > 1]
                logger.error(
                    'ValueError: Index contains duplicate entries, cannot reshape with {}'
                    .format(duplicates))
                return pd.DataFrame()

            if xrs == 'eas':
                df.columns = 'endpoint_' + df.columns.droplevel()
            else:
                df.columns = df.columns.droplevel()
            df.columns.name = None
            df.index.name = 'sip'
        return df

    def get_threat_level(model_list, fast_path=False):
        ers_models = merge_models(deviceID)
        threat_level_map = {
            i["modelID"]: i["threat_level"]
            for i in ers_models["ersModels"]
        }
        threat_level_list = [threat_level_map.get(i, 4) for i in model_list]
        if fast_path:
            return threat_level_list
        threat_weigth_list = [
            config.threat_level_mapping.get(i) for i in threat_level_list
        ]
        return threat_weigth_list

    @cache_value
    def get_xrs_df(xrs):
        df_xrs = pd.DataFrame({
            "sip": [],
            xrs + "_scores": []
        }).set_index("sip")
        df_xrs_list = []
        value = simple_datetime(xrs_timestamp, str, True)

        if xrs == 'mrs':
            try:
                xrs_scores = mrs_es.match_obj_or_404(field=field,
                                                     value=value,
                                                     deviceID=deviceID)
            except APIDataNotFound:
                return df_xrs
            except:
                logger.exception('Unknow error!')
                return df_xrs

            for i in xrs_scores:
                df = pd.DataFrame(index=[i['_source']['policyID']],
                                  data=eval(i['_source']['scores']))
                df = df.T
                df.index.name = 'sip'
                df_xrs_list.append(df)
        else:
            if xrs == 'ars':
                df = get_df(xrs, deviceID, value, ars_es)
                df_xrs_list.append(df)
                df = get_df('eas', deviceID, value, eas_es)
            else:
                df = get_df(xrs, deviceID, value, ers_es)
            df_xrs_list.append(df)

        if df_xrs_list:
            if pd.__version__ < config.pd_old1_version:
                df_xrs = pd.concat(df_xrs_list, axis=1).fillna(0.2)
            else:
                df_xrs = pd.concat(df_xrs_list, axis=1, sort=False).fillna(0.2)

            if xrs == "ers":
                model_list = df_xrs.columns.tolist()
                threat_weigth_list = get_threat_level(model_list)
                df_xrs[xrs + '_scores'] = df_xrs[model_list].mul(
                    threat_weigth_list).max(axis=1)
            else:
                df_xrs[xrs + '_scores'] = df_xrs.max(axis=1)
        return df_xrs.round(1)

    @catch_except({"defaults": 0.20, "order": [], "scores": {}}, inlog=True)
    def get_riskLevel():
        df_ars = get_xrs_df('ars')
        df_mrs = get_xrs_df('mrs')
        df_ers = get_xrs_df('ers')
        df = pd.concat([
            df_ars[['ars_scores']], df_mrs[['mrs_scores']],
            df_ers[['ers_scores']]
        ],
                       axis=1).fillna(0.2).replace(0, 0.2)
        if not df.empty:
            df['scores'] = df.apply(_risk_level, axis=1)
            risk_data = {"defaults": 1, "timestamp": xrs_timestamp}
            risk_data['scores'] = df[['scores']].to_dict("index")
            return risk_data

    @catch_except({"defaults": 0.20, "order": [], "scores": {}}, inlog=True)
    def get_arsScores():
        df_ars = get_xrs_df('ars')
        ars_data = {
            "defaults": 0.20,
            "timestamp": xrs_timestamp,
            "order": [],
            "feature_order": [],
            "feature_mode": "max",
            "scores": {}
        }
        un_columns = ['network', 'protocols', 'events', 'endpoint']
        ars_data['order'] = un_columns
        for col_name in un_columns:
            selector = df_ars.columns[df_ars.columns.str.contains(
                col_name)].astype(str)
            ars_data['feature_order'].append(
                [i.split('_')[-1] for i in selector.tolist()])
            if pd.__version__ < config.pd_old1_version:
                df_ars[col_name] = df_ars.apply(
                    lambda x: [x[i] for i in x.keys() if col_name in i],
                    axis=1)
            else:
                df_ars[col_name] = df_ars[selector].apply(
                    lambda x: x.tolist(),
                    axis=1).apply(lambda x: x if isinstance(x, list) else [])

        df_ars = df_ars[un_columns]
        df_ars['scores'] = df_ars.apply(lambda x: [[i for i in x]],
                                        axis=1).apply(lambda x: x[0])
        ars_data['scores'] = df_ars[['scores']].to_dict('index')
        return ars_data

    @catch_except({"defaults": 0.20, "order": [], "scores": {}}, inlog=False)
    def get_xrsScores(xrs):
        df_xrs = get_xrs_df(xrs)
        xrs_data = {
            "defaults": 0.20,
            "timestamp": xrs_timestamp,
            "order": [],
            "scores": {}
        }
        df_xrs = df_xrs.drop([xrs + '_scores'], axis=1)
        model_list = df_xrs.columns.tolist()
        xrs_data['order'] = model_list
        if xrs == "ers":
            xrs_data['threat_level'] = get_threat_level(model_list, True)
        df_xrs['scores'] = df_xrs.apply(
            lambda x: [[i for i in x]],
            axis=1).apply(lambda x: x[0] if len(x) == 1 else x)
        xrs_data['scores'] = df_xrs[['scores']].to_dict('index')
        return xrs_data

    if xrs == 'riskLevel':
        data = get_riskLevel()

    elif xrs == 'arsScores':
        data = get_arsScores()

    elif xrs == 'ersScores' or xrs == 'mrsScores':
        data = get_xrsScores(xrs[:3])

    elif xrs is None:
        data = get_riskLevel(), get_arsScores(), get_xrsScores(
            'ers'), get_xrsScores('mrs')
    return data
Esempio n. 9
0
 def post(self, deviceID, paramID=None):
     data = defaultdict(dict)
     data_pre = request.data
     data_pre['deviceID'] = deviceID
     serializer = EASSerializer(data=data_pre)
     serializer.is_valid(raise_exception=True)
     kwargs = serializer.data
     startTimestamp = kwargs['startTimestamp']
     endTimestamp = kwargs['endTimestamp']
     size = kwargs.pop('size')
     from_size = kwargs.pop('from_size')
     must_list = kwargs.pop('must_list')
     must_not_list = [{"term": {"scores": 0}}]
     if paramID:
         must_list += [{"term": {"anomalyID.keyword": paramID}}]
         anomaly = get_anomaly(deviceID, paramID)
         data[paramID]['name'] = anomaly['name']
     else:
         paramID = "_all"
         anomalies = get_anomaly(deviceID, paramID)
         data[paramID]['name'] = {"en": "All anomaly", "zh": "全部异常"}
     esorm = AnomaliesScoresORM()
     datetime_list = get_datetime_list(esorm, deviceID, must_not_list, must_list, **kwargs)
     min_bound_date = simple_datetime(startTimestamp, str, True)[:10]
     max_bound_date = simple_datetime(endTimestamp - 3600 * 24, str, True)[:10]
     demo = {
         "hits": 0,
         "top": {
             "abnormalHits": [],
             "abnormalScores": []
         },
         "histogram": {
             "abnormalHits": []
         },
     }
     data[paramID].update(demo)
     query = {
         "size": size,
         "from": from_size,
         "_source": ["username", "scores"],
         "sort": [
             {"scores": {"order": "desc"}}
         ],
         "query": {
             "bool": {
                 "must": must_list,
                 "filter": [
                     {"terms": {"timestamp": datetime_list}},
                     {"term": {"deviceID.keyword": deviceID}}
                 ],
                 "must_not": must_not_list
             }
         },
         "aggs": {
             "count_anomaly": {
                 "terms": {
                     "field": "anomalyID.keyword",
                     "size": config.MAX_AGGS
                 },
                 "aggs": {
                     "histinfo": {
                         "date_histogram": {
                             "field": "timestamp",
                             "interval": "day",
                             "extended_bounds": {
                                 "min": min_bound_date,
                                 "max": max_bound_date
                             },
                             "min_doc_count": 0,
                             "format": "yyyy-MM-dd"
                         },
                         "aggs": {
                             "clone_count": {
                                 "cumulative_sum": {
                                     "buckets_path": "_count"
                                 }
                             }
                         }
                     }
                 }
             },
             "top_hits": {
                 "terms": {
                     "field": "username.keyword",
                     "size": config.TOP_SIZA,
                     "show_term_doc_count_error": True
                 }
             },
             "top_scores": {
                 "terms": {
                     "field": "username.keyword",
                     "size": config.TOP_SIZA,
                     "show_term_doc_count_error": True,
                     "order": [
                         {"max_scores": "desc"}
                     ]
                 },
                 "aggs": {
                     "max_scores": {
                         "max": {
                             "field": "scores"
                         }
                     }
                 }
             }
         }
     }
     res = esorm.aggregate(query=query)
     if res["hits"]["total"] != 0:
         data[paramID]["hits"] = res["hits"]["total"]
         acab = res["aggregations"]["count_anomaly"]["buckets"]
         athb = res["aggregations"]["top_hits"]["buckets"]
         atsb = res["aggregations"]["top_scores"]["buckets"]
         data[paramID]["histogram"]["abnormalHits"] = sum([[j["doc_count"] for j in i["histinfo"]["buckets"]] for i in acab], axis=0)  # .tolist()
         data[paramID]["top"]["abnormalHits"] = [{"username": i["key"], "hits": i["doc_count"]} for i in athb[0:size]]
         data[paramID]["top"]["abnormalScores"] = [{"username": i["key"], "scores": i["max_scores"]["value"]} for i in atsb[0:size]]
         if paramID == "_all":
             data[paramID]["anomalies_hits"] = [{"anomalyID": i["key"], "hits": i["doc_count"], "name": anomalies[i["key"]]["name"]} for i in acab]
     return Response(data)
Esempio n. 10
0
 def get_query(self):
     _query = {
         # "sort": {"@timestamp": {"order": "desc"}},
         "query": {
             "bool": {
                 "must": [
                     {"term": {"local_orig": {"value": "false"}}},
                     {"term": {"local_resp": {"value": "true"}}},
                     {"term": {"proto.keyword": {"value": "tcp"}}},
                     {"term": {"deviceID.keyword": self.deviceID}},
                     {"terms": {"conn_state.keyword": ["S1", "S2", "S3", "SF", "RSTO", "RSTR"]}}
                 ],
                 "must_not": [
                     {"terms": {"resp_h": self.white_list}},
                     {"terms": {"resp_p": self.ignore_ports}},
                 ],
                 "should": [
                 ],
                 "filter": [
                     {"range": {"resp_p": {"lt": 10000}}},
                     {
                         "range": {
                             "@timestamp": {
                                 "gte": simple_datetime(self.start_time, str, True),
                                 "lt": simple_datetime(self.end_time, str, True)
                             }
                         }
                     }
                 ]
             }
         },
         "size": 0,
         "aggs": {
             "ipterm": {
                 "terms": {
                     "field": "userId.keyword",
                     "min_doc_count": self.at_least[0],
                     "size": self.config.MAX_AGGS
                 },
                 "aggs": {
                     "dipterm": {
                         "terms": {
                             "field": "resp_h",
                             "size": self.config.MAX_AGGS
                         },
                         "aggs": {
                             "resp_p#cnt": {
                                 "cardinality": {
                                     "field": "resp_p"
                                 }
                             },
                             "orig_p#cnt": {
                                 "cardinality": {
                                     "field": "orig_p"
                                 }
                             },
                             "scores_selector": {
                                 "bucket_selector": {
                                     "buckets_path": {
                                         "resp_pcnt": "resp_p#cnt",
                                         "orig_pcnt": "orig_p#cnt"
                                     },
                                     "script": "params.resp_pcnt>{} && params.orig_pcnt>{}".format(*self.at_least)
                                 }
                             }
                         }
                     },
                     "scores_selector": {
                         "bucket_selector": {
                             "buckets_path": {
                                 "dipterm": "dipterm._bucket_count"
                             },
                             "script": "params.dipterm>0"
                         }
                     },
                     "value": {
                         "bucket_script": {
                             "buckets_path": {},
                             "script": "1"
                         }
                     },
                     "_fill#dipterm#orig_h#key": {
                         "bucket_script": {
                             "buckets_path": {
                             },
                             "script": "0"
                         }
                     },
                     "count": {
                         "sum_bucket": {
                             "buckets_path": "dipterm._count"
                         }
                     }
                 }
             }
         }
     }
     return _query
Esempio n. 11
0
 def validate_timestamp(self, timestamp):
     time_deviation = config.TIME_DEVIATION
     t_now = simple_datetime(None, int)
     if not t_now - time_deviation < timestamp < t_now + time_deviation:
         raise APIINPUTERROR(_('Invalid timestamp.'))
     return timestamp
Esempio n. 12
0
def get_anomaly_forensics(deviceID=None,
                          ID=None,
                          temp=None,
                          xrs=None,
                          user=None,
                          pageSize=None,
                          timestamp=None):
    try:
        data = {}
        es_orm = AnomaliesScoresORM()
        start = simple_datetime(timestamp, str, True)
        doc_id = get_doc_id(start, deviceID, ID, user)
        res = es_orm.get_obj_or_404(doc_id=doc_id)

        if res.get("scores", 1) != 0:
            if temp is None:
                if xrs == 'eas':
                    temp = get_anomaly(ID)
                elif xrs == 'ers':
                    temp = get_ers_models(deviceID)['params'][ID]
                else:
                    raise Exception

            data[ID] = temp
            data[ID]['scores'] = res.get('scores', -1)
            from_ = (pageSize - 1) * 5

            if res['details'].get('logs'):
                size = pageSize * 5
                log_size = len(res["details"]["logs"])
                ids = res['details']['logs'][from_:size]
                res['details']['logs'] = get_logs_with_ids(ids)
                res['details']['size'] = log_size
            else:
                index = res['details'].pop('index', None)
                index_list = res['details'].pop('index_list', None)
                query = res['details'].pop('query', {})
                index = index_list or index

            if index and query != {}:
                size = 5
                res['details']['logs'], res['details'][
                    'size'] = get_logs_with_query(index, query, from_, size)

                if 'display' in temp and xrs == 'eas' and 'agg_query' in temp[
                        'forensics']['graphs']['template'][
                            0]:  # if anomalyid has no graphs,exception,短路
                    aggs_querys = {}
                    for graph in temp['forensics']['graphs']['template']:
                        aggs_querys.update(graph['agg_query'])

                    _query = json.loads(query)
                    _query['aggs'] = aggs_querys

                    graphs_values = ElasticsearchORM(index).search(
                        query=_query)['aggregations']
                    remove_buckets(graphs_values)
                    res['graphs'] = graphs_values

                if ID in [
                        "23787c99-4b94-4514-a38e-f753b8f47e57",
                        "c91dd8fa-af7f-11e9-a5a5-144f8a006a90"
                ]:
                    for i in res['details']['logs']:
                        if "geoip" in i:
                            if i['geoip']['country_name'] in [
                                    "Taiwan", "Hong Kong", "Macao"
                            ]:
                                i['geoip']['country_name'] = "China " + i[
                                    'geoip']['country_name']

            dct = data[ID]['forensics']
            data[ID]['forensics'] = dict_merge(dct, res)

        # added by wendong, compatible with version 3.3
        if config.UCSS_VERSION == 3.3:
            for k, v in data.items():
                graphs = v["forensics"]["graphs"]
            _graphs = copy.deepcopy(graphs)
            for i in _graphs["template"]:
                if i["type"] == 1:
                    graphs["template"].remove(i)
                    continue
                elif i["type"] == 2:
                    graphs["histCnt"] = get_histCnt(graphs["histCnt"],
                                                    timestamp)
                elif i["type"] == 3:
                    graphs["timeseries"] = [
                        item["key_as_string"] for item in graphs["timeseries"]
                    ]

        return data

    except APIDataNotFound:
        logger.debug(
            "{}ScoresORM 404_id:{} start:{} deviceID ID:{} userId".format(
                xrs.upper(), doc_id, timestamp, ID))
        return {}

    except:
        logger.exception("{} {} {} {} {}\n".format(timestamp, deviceID, ID,
                                                   user, pageSize))
        return {}
Esempio n. 13
0
def merge_topn(scores_dict,
               deviceID,
               pk,
               timestamp,
               size,
               histdays=1,
               at_least=None):
    feature_must = {
        "http": [{
            "terms": {
                "channelType": [1, 2]
            }
        }],
        "domain": [{
            "terms": {
                "channelType": [1, 2]
            }
        }],
        "post": [{
            "terms": {
                "method.keyword": ["POST", "PUT", "PATCH", "DELETE"]
            }
        }],
        "get": [{
            "terms": {
                "method.keyword": ["GET", "HEAD", "OPTIONS"]
            }
        }],
        "ssl": [{
            "term": {
                "service.keyword": "ssl"
            }
        }],
        "udp": [{
            "term": {
                "proto.keyword": "udp"
            }
        }],
        "tcp": [{
            "term": {
                "proto.keyword": "tcp"
            }
        }],
        "icmp": [{
            "term": {
                "proto.keyword": "icmp"
            }
        }]
    }
    if pk == 'network':
        es = auto_client(pk, histdays + 1,
                         simple_datetime(timestamp, datetime.datetime).date())
        term_field = 'resp_h'
        sum_field = 'orig_ip_bytes'
    else:
        es = auto_client('swg', histdays + 1,
                         simple_datetime(timestamp, datetime.datetime).date())
        term_field = 'uriDomain.keyword'
        sum_field = 'sendBytes'

    _query = {
        "query": {
            "bool": {
                "must": [{
                    "term": {
                        "deviceID.keyword": deviceID
                    }
                }, {
                    "term": {
                        "userId.keyword": scores_dict['userId']
                    }
                }] + feature_must[scores_dict['feature']],
                "filter": [{
                    "range": {
                        "@timestamp": {
                            "gte": (timestamp - histdays * 3600 * 24) * 1000,
                            "lt": timestamp * 1000
                        }
                    }
                }]
            }
        },
        "size": 0,
        "aggs": {
            "tarinfo": {
                "terms": {
                    "field": term_field,
                    "size": size
                },
                "aggs": {
                    "bytes_sum": {
                        "sum": {
                            "field": sum_field
                        }
                    }
                }
            }
        }
    }

    scores_dict['tarinfo'] = es.aggregate(
        query=_query)['aggregations']['tarinfo']['buckets']
    return