Ejemplo n.º 1
0
    def _format_data(self, data, function):
        """ Private method, format data obtained after mongo or els aggregate, format timestamps according
        to request's timedelta

        :param data: data obtained after aggregation
        :param function: function name of current reporting_dicts
        :return: array of filled results
        """
        if type_of_dict(function, "date_based"):
            series = []
            m_series = {}
            for row in data:
                if self.date_accuracy == "minute":
                    date = datetime.datetime(row['name']['year'],
                                             row['name']['month'],
                                             row['name']['day'],
                                             row['name']['hour'],
                                             row['name']['minute'], 0)

                elif self.date_accuracy == "hour":
                    date = datetime.datetime(row['name']['year'],
                                             row['name']['month'],
                                             row['name']['day'],
                                             row['name']['hour'], 0, 0)

                elif self.date_accuracy == "day":
                    date = datetime.datetime(row['name']['year'],
                                             row['name']['month'],
                                             row['name']['day'], 0, 0, 0)

                else:
                    date = datetime.datetime(row['name']['year'],
                                             row['name']['month'], 1, 0, 0, 0)

                date = date.replace(tzinfo=pytz.UTC)
                result = dict(name=date, value=row["value"])

                if type_of_dict(function, "multiple_series"):
                    try:
                        m_series[row['name']['series']].append(result)
                    except:
                        m_series[row['name']['series']] = [result]

                else:
                    series.append(result)

            return m_series if type_of_dict(function,
                                            "multiple_series") else series

        else:
            return data
Ejemplo n.º 2
0
    def _check_args(self, attr, function):
        """ Private method, adds more precise columns according to request's timedelta.

        :param attr: attributes for mongo
        :param function: function name of current reporting_dicts
        :returns: attribute filtes for mongo query
        """

        if type_of_dict(function, "date_based"):
            self._set_date_accuracy()

            try:
                group = attr[1]['$group']
            except:
                group = attr[2]['$group']

            if self.date_accuracy == 'minute':
                group['_id'].update({'minute': {'$minute': '$time'}})
                group['_id'].update({'hour': {'$hour': '$time'}})
                group['_id'].update({'day': {'$dayOfMonth': '$time'}})

            elif self.date_accuracy == 'hour':
                group['_id'].update({'day': {'$dayOfMonth': '$time'}})
                group['_id'].update({'hour': {'$hour': '$time'}})

            elif self.date_accuracy == 'day':
                group['_id'].update({'day': {'$dayOfMonth': '$time'}})
        return attr
Ejemplo n.º 3
0
    def aggregate(self, params):
        """ Public method used to fetch data to build histogram or lines for echarts
        It counts the number of hits and group by desired field over the desired period of time

        :param params: Dict of
        :return: data
        """
        try:
            self.startDate = parse(params['startDate']).astimezone(pytz.utc)
            self.endDate   = parse(params['endDate']).astimezone(pytz.utc)

            self.delta     = self.endDate - self.startDate
            self.report_dict  = reporting_dicts[params['reporting_type']]

            index  = self._prepare_index_name(params['type_logs'], self.startDate, self.endDate)
            result = {}
            for function in self.report_dict.keys():
                try:
                    search = self._prepare_filter_search(self.report_dict[function]['elastic'], function, params)
                    data   = self.els.search(index=index, body=search, ignore_unavailable=True)
                    result[function] = self._format_data(data, function)
                except ElasticsearchException:
                    logger.error("An error occurred during ElasticSearch aggregation {}".format(function), exc_info=1)
                    result[function] = {} if type_of_dict(function, "multiple_series") else []
                    raise ClientLogException("An error occurred with ElasticsearchClient")
            return result
        except Exception:
            logger.error("An error occurred with ElasticSearch aggregate()", exc_info=1)
            raise ClientLogException("An error occurred with ElasticsearchClient")
Ejemplo n.º 4
0
    def aggregate(self, params):
        """ Public method used to fetch data to build histogram or lines for echarts
        It counts the number of hits and group by desired field over the desired period of time

        :param params: Dict of
        :return: data
        """

        log_collection = self._get_collection(params['type_logs'])
        if not log_collection:
            return None
        try:
            self.startDate = parse(params['startDate']).astimezone(pytz.utc)
            self.endDate = parse(params['endDate']).astimezone(pytz.utc)

            self.delta = self.endDate - self.startDate
            self.report_dict = reporting_dicts[params['reporting_type']]

            date_filter = self._prepare_filter_match_daterange(params)
            data = {}

            for function in self.report_dict.keys():
                try:
                    attr_filter = [date_filter]
                    for key, value in self.report_dict[function][
                            'mongo'].items():
                        if "Pip" in key:
                            filt = {key.split("Pip")[0]: value}
                        else:
                            filt = {key: value}

                        attr_filter.append(copy.deepcopy(filt))

                    filters = self._check_args(attr_filter, function)
                    data[function] = log_collection.aggregate(filters)
                    data[function] = self._format_data(data[function],
                                                       function)
                except PyMongoError:
                    logger.error(
                        "An error occurred during MongodbClient aggregation {}"
                        .format(function),
                        exc_info=1)
                    data[function] = {} if type_of_dict(
                        function, "multiple_series") else []
                    raise ClientLogException(
                        "An error occurred with Mongodb Client")
            return data
        except Exception as e:
            logger.exception(e)
            logger.error("An error occurred on MongodbClient aggregate()")
            raise ClientLogException("An error occurred with Mongodb Client")
Ejemplo n.º 5
0
    def merge_aggregations(self, aggregation, previous_results):
        """ Public method, allow to merge a previous aggregation with a new one (either mongo or els)
            only called when multiple repos are used for reporting. Look over new results to find results with the
             same name in the previous results. If found, merge values according to function's type (sum, avg, concat
             etc...) and replace previous result or append merged result in previous results

        :param aggregation: new results of an aggregation
        :param previous_results: previous results of an aggregation
        :returns: merged results
        """
        results = {}

        for function, values in aggregation.items():
            results[function] = previous_results[function]
            if type_of_dict(function, "date_based") and type_of_dict(function, "multiple_series"):
                for key, value in values.items():
                    try:
                        previous_dict = previous_results[function][key]
                        for entry in value:
                            prev_result_dict = filter(lambda item: item['name'] == entry['name'],  previous_dict)
                            if prev_result_dict:
                                index = previous_dict.index(prev_result_dict[0])

                                if type_of_dict(function, "average"):
                                    prev_result_dict = {"name" : entry['name'], "value" : (entry['value'] + prev_result_dict[0]['value'])/2}
                                else:
                                    prev_result_dict = {"name": entry['name'], "value": prev_result_dict[0]['value'] + entry['value']}

                                previous_dict[index] = prev_result_dict

                            else:
                                previous_dict.append(entry)
                        previous_results[function][key] = previous_dict
                        previous_results[function][key].sort(key=lambda x:x["name"])
                    except:
                        previous_results[function][key] = value
                        pass

            else:
                for entry in values:
                    prev_result_dict = filter(lambda item: item['name'] == entry['name'],  previous_results[function])
                    if prev_result_dict:
                        index = previous_results[function].index(prev_result_dict[0])

                        if type_of_dict(function, "average"):
                            prev_result_dict = {"name" : entry['name'], "value" : (entry['value'] + prev_result_dict[0]['value'])/2}

                        elif function == "reputation_tags":
                            for entry_ip in entry["ips"]:
                                previous_ip = filter(lambda item: item['ip'] == entry_ip['ip'],  prev_result_dict[0]["ips"])
                                if previous_ip:
                                    index_ip = prev_result_dict[0]["ips"].index(previous_ip[0])
                                    previous_ip = {"ip": entry_ip["ip"], "value": (entry_ip['value'] + previous_ip[0]["value"])}
                                    prev_result_dict[0]["ips"][index_ip] = previous_ip

                                else:
                                    prev_result_dict[0]["ips"].append(entry_ip)

                            prev_result_dict[0]["ips"].sort(key=lambda x:x["value"], reverse=True)
                            prev_result_dict = {"name": entry['name'], "value": prev_result_dict[0]['value'] + entry['value'], "ips": prev_result_dict[0]['ips'][:20]}

                        elif type_of_dict(function, "multiple_series"):
                            for sub_entry in entry["value"]:
                                previous_sub_entry = filter(lambda item: item['name'] == sub_entry['name'],  prev_result_dict[0]["value"])
                                if previous_sub_entry:
                                    index_sub_entry = prev_result_dict[0]["value"].index(previous_sub_entry[0])
                                    previous_sub_entry = {"name": sub_entry["name"], "value": (sub_entry['value'] + previous_sub_entry[0]["value"])}
                                    prev_result_dict[0]["value"][index_sub_entry] = previous_sub_entry

                                else:
                                    prev_result_dict[0]["value"].append(sub_entry)

                            prev_result_dict = {"name": entry['name'], "value": prev_result_dict[0]['value']}

                        else:
                            prev_result_dict = {"name": entry['name'], "value": prev_result_dict[0]['value'] + entry['value']}

                        previous_results[function][index] = prev_result_dict

                    else:
                        previous_results[function].append(entry)


                if type_of_dict(function, "date_based"):
                    previous_results[function].sort(key=lambda x:x["name"])

                elif type_of_dict(function, "order_count"):
                    previous_results[function].sort(key=lambda x:x["value"], reverse=True)

        return previous_results
Ejemplo n.º 6
0
    def fill_data(self, results):
        """ Public method, format data obtained after mongo or els aggregate, format timestamps according
        to request's timedelta (mongo only) and also add new entries with 0 as value between aggregated data for
        reporting accuracy.

        :param data: data obtained after aggregation
        :param function: function name of current report_dicts
        :return: array of filled results
        """
        filled_results = results.copy()

        for function, values in results.items():
            if type_of_dict(function, "multiple_series") and type_of_dict(function, "date_based"):
                filled_series = {}
                for key, value in values.items():
                    data = value
                    series = []
                    previous_date = self.startDate
                    for row in data:
                        date = row['name']
                        date_range = self._get_date_range(previous_date, date)
                        for single_date in date_range:
                            if single_date != previous_date or (previous_date == self.startDate and single_date != previous_date):
                                inter_result = {"name" : single_date, "value" : 0}
                                series.append(inter_result.copy())

                        series.append(row)
                        previous_date = date

                        if row == data[-1]:
                            end_date_range = self._get_date_range(date, self.endDate)
                            for single_date in end_date_range:
                                if single_date != previous_date:
                                    last_result = {"name" : single_date, "value" : 0}
                                    series.append(last_result.copy())

                    filled_series[key] = series

                filled_results[function] = filled_series

            elif type_of_dict(function, "date_based"):
                data = results[function]
                series = []
                previous_date = self.startDate
                for row in data:
                    date = row['name']
                    date_range = self._get_date_range(previous_date, date)

                    for single_date in date_range:
                        if single_date != previous_date or (previous_date == self.startDate and single_date != previous_date):
                            inter_result = {"name" : single_date, "value" : 0}
                            series.append(inter_result.copy())

                    series.append(row)
                    previous_date = date

                    if row == data[-1]:
                        end_date_range = self._get_date_range(date, self.endDate)
                        for single_date in end_date_range:
                            if single_date != previous_date:
                                last_result = {"name" : single_date, "value" : 0}
                                series.append(last_result.copy())

                filled_results[function] = series

            elif function == "owasp_top10":
                data = results[function]
                filled_series = []
                for serie in data:
                    result_serie = {"name": serie["name"]}
                    result_serie["value"] = {"A1": 0, "A2": 0, "A3": 0, "A4": 0, "A5": 0, "A6": 0, "A7": 0, "A8" : 0, "A9": 0, "A10": 0}
                    for row in serie["value"]:
                        result = re.sub(r'[[\]"\\]', "", row["name"]).split(",")
                        for key in result:
                            result_serie["value"][key] += row["value"]

                    filled_series.append(result_serie)

                filled_results[function] = filled_series

            elif function == "blocked_requests":
                data = results[function]
                series = {}
                # BUG: EMPTY REPORTS (Bug probably due to the  Python2 to 3 migration)
                # In mongo db, len is not valid?!?  as the line is not really needed, we can just skip it!
                # Autor: Bonomani
                #if len(data) > 0:
                series = {"SQLI": 0, "XSS": 0, "CSRF": 0, "Evade": 0, "Traversal": 0, "RFI" : 0, "LFI": 0, "RCE": 0, "PHPI": 0, "HTTP": 0, "SESS": 0}
                    for row in data:
                        result = re.findall(r'\b(\w+)\b\=(?:\b(\w+)\b)?', row["name"])
                        for tuple_key in result:
                            try:
                                series[tuple_key[0]] += (row["value"] * int(tuple_key[1]))
                            except:
                                pass

                filled_results[function] = series
                filled_results["security_radar"] = series

            elif function == "UA_based":
                data = results[function]
                series_browser, series_Os = {}, {}
                try:
                    from ua_parser import user_agent_parser

                    for row in data:
                        parsed_UA = user_agent_parser.Parse(row["name"])
                        try:
                            series_browser[parsed_UA['user_agent']['family']] += row["value"]
                        except:
                            series_browser[parsed_UA['user_agent']['family']] = row["value"]

                        try:
                            series_Os[parsed_UA['os']['family']] += row["value"]
                        except:
                            series_Os[parsed_UA['os']['family']] = row["value"]

                except ImportError:
                    series_browser = series_Os = "Import not found: UA Agent Parser"

                filled_results["browser_UA"] = series_browser
                filled_results["os_UA"] = series_Os
                filled_results.pop("UA_based", None)
Ejemplo n.º 7
0
    def _format_data(self, data, function):
        """ Private method used to format data after an elastic aggregation. Returned formatted data in the same format as other BaseLog instances

        :param data: List of
        :param function: function name of current reporting_dicts
        :return: formatted data
        """
        formatted_data = []

        try:
            if type_of_dict(function, "date_based"):
                if type_of_dict(function, "multiple_series"):
                    formatted_data = {}
                    for row in data['aggregations'][function][function]['buckets'][0][function]['buckets']:
                        for sub_row in row[function]['buckets']:
                            date = parse(row['key_as_string'])
                            result = {"name": date}


                            if function == 'request_count':
                                result['value'] = sub_row['doc_count']
                            elif sub_row['avg_bucket']['value'] is not None:
                                result['value'] = sub_row['avg_bucket']['value']
                            else:
                                result['value'] = 0

                            try:
                                formatted_data[sub_row['key']].append(result)
                            except:
                                formatted_data[sub_row['key']] = [result]

                else:
                    for row in data['aggregations'][function][function]['buckets'][0][function]['buckets']:
                        date = parse(row['key_as_string'])
                        result = {"name": date}
                        if function.startswith('pf_traffic'):
                            result['value'] = row['doc_count']
                        elif row['avg_bucket']['value'] is not None:
                                result['value'] = row['avg_bucket']['value']
                        else:
                            result['value'] = 0

                        formatted_data.append(result)

            elif function == 'static_requests':
                for row in data['aggregations'][function][function]['buckets'][0][function]['buckets']:
                    for sub_row in row[function]['buckets']:
                        result = {"name": {"uri": sub_row["key"], "app": row['key']}, "value": sub_row['doc_count']}
                        formatted_data.append(result)

            elif function == "reputation_tags":
                for row in data['aggregations'][function][function]['buckets'][0][function]['buckets']:
                    ips = []
                    for sub_row in row[function]['buckets']:
                        ips.append({"ip": sub_row["key"], "value": sub_row['doc_count']})

                    result = {"name": row['key'],"ips" : ips, "value": row['doc_count']}
                    formatted_data.append(result)

            elif type_of_dict(function, "multiple_series"):
                for row in data['aggregations'][function][function]['buckets'][0][function]['buckets']:
                    serie_count = []
                    for sub_row in row[function]['buckets']:
                        serie_count.append({"name": sub_row["key"], "value": sub_row['doc_count']})

                    result = {"name": row['key'],"value" : serie_count}
                    formatted_data.append(result)
            else:
                for row in data['aggregations'][function][function]['buckets'][0][function]['buckets']:
                    result = {"name" : row['key'], "value" : row['doc_count']}
                    formatted_data.append(result)

        except KeyError:
            pass

        return formatted_data
Ejemplo n.º 8
0
    def _prepare_filter_search(self, function_dict, function, params):
        """ Private method, prepare filter for aggregation on elasticsearch
        :param params: data for search
        :param delta: delta between start date and end date
        :param args: other filter (format: {'match': {'key': 'value'}})
        return: dict filter for elasticsearch
        """
        if params['type_logs'] == 'access':
            search = {
                    "aggs": {
                        function : {
                        "filter": {
                            "terms": {
                                'app_name': [app.name.replace(' ', '_') for app in params['apps']]
                            }
                        },
                        "aggs": {
                            function: {
                                "date_range": {
                                    "field": "timestamp",
                                    "ranges": {
                                            "from": self.startDate, "to": self.endDate
                                        }
                                }
                            }
                        },
                    }
                },
                    "size":0
            }
        elif params['type_logs'] == 'packet_filter':
            search = {
                    "aggs": {
                        function : {
                        "filter": {
                            "regexp": {
                                "hostname" : params['node']
                            }
                        },
                        "aggs": {
                            function: {
                                "date_range": {
                                    "field": "timestamp",
                                    "ranges": {
                                            "from": self.startDate, "to": self.endDate
                                        }
                                }
                            }
                        },
                    }
                },
                    "size":0
            }

        if type_of_dict(function, "date_based"):
            self._set_date_accuracy()
            function_dict['aggs'][function]['date_histogram']['interval'] = self.date_accuracy

        search['aggs'][function]['aggs'][function]['aggs'] = function_dict['aggs']

        try:
            search['query'] = function_dict['query']
        except:
            pass

        return search