def common_values(request): config = request.GET.mixed() datasource = config.pop("datasource", "logs") filter_settings = build_filter_settings_from_query_dict(request, config) resources = list(filter_settings["resource"]) tag_name = filter_settings["tags"][0]["value"][0] and_part = [{"terms": {"resource_id": list(resources)}}] if filter_settings["namespace"]: and_part.append({"terms": {"namespace": filter_settings["namespace"]}}) query = {"query": {"bool": {"filter": and_part}}} query["aggs"] = { "sub_agg": { "terms": { "field": "tags.{}.values".format(tag_name), "size": 50 } } } index_names = es_index_name_limiter(ixtypes=[datasource]) result = Datastores.es.search(body=query, index=index_names, doc_type="log", size=0) values_buckets = result["aggregations"]["sub_agg"].get("buckets", []) return {"values": [item["key"] for item in values_buckets]}
def get_time_series_aggregate(cls, app_ids=None, filter_settings=None): if not app_ids: return {} es_query = cls.es_query_builder(app_ids, filter_settings) es_query["aggs"] = { "events_over_time": { "date_histogram": { "field": "timestamp", "interval": "1h", "min_doc_count": 0, "extended_bounds": { "max": filter_settings.get("end_date"), "min": filter_settings.get("start_date"), }, } } } log.debug(es_query) index_names = es_index_name_limiter( filter_settings.get("start_date"), filter_settings.get("end_date"), ixtypes=["logs"], ) if index_names: results = Datastores.es.search(body=es_query, index=index_names, doc_type="log", size=0) else: results = [] return results
def get_search_iterator(cls, app_ids=None, page=1, items_per_page=50, order_by=None, filter_settings=None, limit=None): if not app_ids: return {}, 0 es_query = cls.es_query_builder(app_ids, filter_settings) sort_query = { "sort": [ {"timestamp": {"order": "desc"}} ] } es_query.update(sort_query) log.debug(es_query) es_from = (page - 1) * items_per_page index_names = es_index_name_limiter(filter_settings.get('start_date'), filter_settings.get('end_date'), ixtypes=['logs']) if not index_names: return {}, 0 results = Datastores.es.search(es_query, index=index_names, doc_type='log', size=items_per_page, es_from=es_from) if results['hits']['total'] > 5000: count = 5000 else: count = results['hits']['total'] return results['hits'], count
def count_by_type(cls, report_type, resource_id, since_when): report_type = ReportType.key_from_value(report_type) index_names = es_index_name_limiter(start_date=since_when, ixtypes=['reports']) es_query = { 'aggs': {'reports': {'aggs': { 'sub_agg': {'value_count': {'field': 'tags.group_id.values'}}}, 'filter': {'and': [{'terms': {'resource_id': [resource_id]}}, {'exists': { 'field': 'tags.group_id.values'}}]}}}, 'query': {'filtered': {'filter': { 'and': [{'terms': {'resource_id': [resource_id]}}, {'terms': {'tags.type.values': [report_type]}}, {'range': {'timestamp': { 'gte': since_when}}}]}}}} if index_names: result = Datastores.es.search(es_query, index=index_names, doc_type='log', size=0) return result['aggregations']['reports']['sub_agg']['value'] else: return 0
def get_time_series_aggregate(cls, app_ids=None, filter_settings=None): if not app_ids: return {} es_query = cls.es_query_builder(app_ids, filter_settings) es_query["aggs"] = { "events_over_time": { "date_histogram": { "field": "timestamp", "interval": "1h", "min_doc_count": 0, 'extended_bounds': { 'max': filter_settings.get('end_date'), 'min': filter_settings.get('start_date')} } } } log.debug(es_query) index_names = es_index_name_limiter(filter_settings.get('start_date'), filter_settings.get('end_date'), ixtypes=['logs']) if index_names: results = Datastores.es.search( es_query, index=index_names, doc_type='log', size=0) else: results = [] return results
def common_tags(request): config = request.GET.mixed() filter_settings = build_filter_settings_from_query_dict(request, config) resources = list(filter_settings["resource"]) query = { "query": { "filtered": { "filter": { "and": [{ "terms": { "resource_id": list(resources) } }] } } } } start_date = filter_settings.get('start_date') end_date = filter_settings.get('end_date') filter_part = query['query']['filtered']['filter']['and'] date_range = {"range": {"timestamp": {}}} if start_date: date_range["range"]["timestamp"]["gte"] = start_date if end_date: date_range["range"]["timestamp"]["lte"] = end_date if start_date or end_date: filter_part.append(date_range) levels = filter_settings.get('level') if levels: filter_part.append({"terms": {'log_level': levels}}) namespaces = filter_settings.get('namespace') if namespaces: filter_part.append({"terms": {'namespace': namespaces}}) query["aggs"] = {"sub_agg": {"terms": {"field": "tag_list", "size": 50}}} # tags index_names = es_index_name_limiter( ixtypes=[config.get('datasource', 'logs')]) result = Datastores.es.search(query, index=index_names, doc_type='log', size=0) tag_buckets = result['aggregations']['sub_agg'].get('buckets', []) # namespaces query["aggs"] = {"sub_agg": {"terms": {"field": "namespace", "size": 50}}} result = Datastores.es.search(query, index=index_names, doc_type='log', size=0) namespaces_buckets = result['aggregations']['sub_agg'].get('buckets', []) return { "tags": [item['key'] for item in tag_buckets], "namespaces": [item['key'] for item in namespaces_buckets] }
def common_values(request): config = request.GET.mixed() datasource = config.pop('datasource', 'logs') filter_settings = build_filter_settings_from_query_dict(request, config) resources = list(filter_settings["resource"]) tag_name = filter_settings['tags'][0]['value'][0] query = { 'query': { 'filtered': { 'filter': { 'and': [{ 'terms': { 'resource_id': list(resources) } }, { 'terms': { 'namespace': filter_settings['namespace'] } }] } } } } query['aggs'] = { 'sub_agg': { 'terms': { 'field': 'tags.{}.values'.format(tag_name), 'size': 50 } } } index_names = es_index_name_limiter(ixtypes=[datasource]) result = Datastores.es.search(query, index=index_names, doc_type='log', size=0) values_buckets = result['aggregations']['sub_agg'].get('buckets', []) return {"values": [item['key'] for item in values_buckets]}
def get_search_iterator( cls, app_ids=None, page=1, items_per_page=50, order_by=None, filter_settings=None, limit=None, ): if not app_ids: return {}, 0 es_query = cls.es_query_builder(app_ids, filter_settings) sort_query = {"sort": [{"timestamp": {"order": "desc"}}]} es_query.update(sort_query) log.debug(es_query) es_from = (page - 1) * items_per_page index_names = es_index_name_limiter( filter_settings.get("start_date"), filter_settings.get("end_date"), ixtypes=["logs"], ) if not index_names: return {}, 0 results = Datastores.es.search( body=es_query, index=index_names, doc_type="log", size=items_per_page, from_=es_from, ) if results["hits"]["total"] > 5000: count = 5000 else: count = results["hits"]["total"] return results["hits"], count
def count_by_type(cls, report_type, resource_id, since_when): report_type = ReportType.key_from_value(report_type) index_names = es_index_name_limiter(start_date=since_when, ixtypes=["reports"]) es_query = { "aggs": { "reports": { "aggs": { "sub_agg": { "value_count": { "field": "tags.group_id.values.keyword" } } }, "filter": { "bool": { "filter": [ { "terms": { "resource_id": [resource_id] } }, { "exists": { "field": "tags.group_id.values" } }, ] } }, } }, "query": { "bool": { "filter": [ { "terms": { "resource_id": [resource_id] } }, { "terms": { "tags.type.values": [report_type] } }, { "range": { "timestamp": { "gte": since_when } } }, ] } }, } if index_names: result = Datastores.es.search(body=es_query, index=index_names, doc_type="log", size=0) return result["aggregations"]["reports"]["sub_agg"]["value"] else: return 0
def get_search_iterator(cls, app_ids=None, page=1, items_per_page=50, order_by=None, filter_settings=None, limit=None): if not app_ids: return {} if not filter_settings: filter_settings = {} query = { "size": 0, "query": { "filtered": { "filter": { "and": [{ "terms": { "resource_id": list(app_ids) } }] } } }, "aggs": { "top_groups": { "terms": { "size": 5000, "field": "_parent", "order": { "newest": "desc" } }, "aggs": { "top_reports_hits": { "top_hits": { "size": 1, "sort": { "start_time": "desc" } } }, "newest": { "max": { "field": "start_time" } } } } } } start_date = filter_settings.get('start_date') end_date = filter_settings.get('end_date') filter_part = query['query']['filtered']['filter']['and'] date_range = {"range": {"start_time": {}}} if start_date: date_range["range"]["start_time"]["gte"] = start_date if end_date: date_range["range"]["start_time"]["lte"] = end_date if start_date or end_date: filter_part.append(date_range) priorities = filter_settings.get('priority') for tag in filter_settings.get('tags', []): tag_values = [v.lower() for v in tag['value']] key = "tags.%s.values" % tag['name'].replace('.', '_') filter_part.append({"terms": {key: tag_values}}) if priorities: filter_part.append({ "has_parent": { "parent_type": "report_group", "query": { "terms": { 'priority': priorities } } } }) min_occurences = filter_settings.get('min_occurences') if min_occurences: filter_part.append({ "has_parent": { "parent_type": "report_group", "query": { "range": { 'occurences': { "gte": min_occurences[0] } } } } }) min_duration = filter_settings.get('min_duration') max_duration = filter_settings.get('max_duration') request_ids = filter_settings.get('request_id') if request_ids: filter_part.append({"terms": {'request_id': request_ids}}) duration_range = {"range": {"average_duration": {}}} if min_duration: duration_range["range"]["average_duration"]["gte"] = \ min_duration[0] if max_duration: duration_range["range"]["average_duration"]["lte"] = \ max_duration[0] if min_duration or max_duration: filter_part.append({ "has_parent": { "parent_type": "report_group", "query": duration_range } }) http_status = filter_settings.get('http_status') report_type = filter_settings.get('report_type', [ReportType.error]) # set error report type if http status is not found # and we are dealing with slow reports if not http_status or ReportType.slow in report_type: filter_part.append({"terms": {'report_type': report_type}}) if http_status: filter_part.append({"terms": {'http_status': http_status}}) messages = filter_settings.get('message') if messages: condition = {'match': {"message": ' '.join(messages)}} query['query']['filtered']['query'] = condition errors = filter_settings.get('error') if errors: condition = {'match': {"error": ' '.join(errors)}} query['query']['filtered']['query'] = condition url_domains = filter_settings.get('url_domain') if url_domains: condition = {'terms': {"url_domain": url_domains}} query['query']['filtered']['query'] = condition url_paths = filter_settings.get('url_path') if url_paths: condition = {'terms': {"url_path": url_paths}} query['query']['filtered']['query'] = condition if filter_settings.get('report_status'): for status in filter_settings.get('report_status'): if status == 'never_reviewed': filter_part.append({ "has_parent": { "parent_type": "report_group", "query": { "term": { "read": False } } } }) elif status == 'reviewed': filter_part.append({ "has_parent": { "parent_type": "report_group", "query": { "term": { "read": True } } } }) elif status == 'public': filter_part.append({ "has_parent": { "parent_type": "report_group", "query": { "term": { "public": True } } } }) elif status == 'fixed': filter_part.append({ "has_parent": { "parent_type": "report_group", "query": { "term": { "fixed": True } } } }) # logging.getLogger('pyelasticsearch').setLevel(logging.DEBUG) index_names = es_index_name_limiter(filter_settings.get('start_date'), filter_settings.get('end_date'), ixtypes=['reports']) if index_names: results = Datastores.es.search(query, index=index_names, doc_type=["report", "report_group"], size=0) else: return [] return results['aggregations']
def get_time_consuming_calls(cls, request, filter_settings, db_session=None): db_session = get_db_session(db_session) # get slow calls from older partitions too index_names = es_index_name_limiter( start_date=filter_settings['start_date'], end_date=filter_settings['end_date'], ixtypes=['slow_calls']) if index_names and filter_settings['resource']: # get longest time taking hashes es_query = { 'aggs': { 'parent_agg': { 'aggs': { 'duration': { 'aggs': {'sub_agg': { 'sum': { 'field': 'tags.duration.numeric_values'} }}, 'filter': {'exists': { 'field': 'tags.duration.numeric_values'}}}, 'total': { 'aggs': {'sub_agg': {'value_count': { 'field': 'tags.statement_hash.values'}}}, 'filter': {'exists': { 'field': 'tags.statement_hash.values'}}}}, 'terms': {'field': 'tags.statement_hash.values', 'order': {'duration>sub_agg': 'desc'}, 'size': 15}}}, 'query': {'filtered': { 'filter': {'and': [ {'terms': { 'resource_id': [filter_settings['resource'][0]] }}, {'range': {'timestamp': { 'gte': filter_settings['start_date'], 'lte': filter_settings['end_date']} }}] } } } } result = Datastores.es.search( es_query, index=index_names, doc_type='log', size=0) results = result['aggregations']['parent_agg']['buckets'] else: return [] hashes = [i['key'] for i in results] # get queries associated with hashes calls_query = { "aggs": { "top_calls": { "terms": { "field": "tags.statement_hash.values", "size": 15 }, "aggs": { "top_calls_hits": { "top_hits": { "sort": {"timestamp": "desc"}, "size": 5 } } } } }, "query": { "filtered": { "filter": { "and": [ { "terms": { "resource_id": [ filter_settings['resource'][0] ] } }, { "terms": { "tags.statement_hash.values": hashes } }, { "range": { "timestamp": { "gte": filter_settings['start_date'], "lte": filter_settings['end_date'] } } } ] } } } } calls = Datastores.es.search(calls_query, index=index_names, doc_type='log', size=0) call_results = {} report_ids = [] for call in calls['aggregations']['top_calls']['buckets']: hits = call['top_calls_hits']['hits']['hits'] call_results[call['key']] = [i['_source'] for i in hits] report_ids.extend([i['_source']['tags']['report_id']['values'] for i in hits]) if report_ids: r_query = db_session.query(Report.group_id, Report.id) r_query = r_query.filter(Report.id.in_(report_ids)) r_query = r_query.filter( Report.start_time >= filter_settings['start_date']) else: r_query = [] reports_reversed = {} for report in r_query: reports_reversed[report.id] = report.group_id final_results = [] for item in results: if item['key'] not in call_results: continue call = call_results[item['key']][0] row = {'occurences': item['total']['sub_agg']['value'], 'total_duration': round( item['duration']['sub_agg']['value']), 'statement': call['message'], 'statement_type': call['tags']['type']['values'], 'statement_subtype': call['tags']['subtype']['values'], 'statement_hash': item['key'], 'latest_details': []} if row['statement_type'] in ['tmpl', ' remote']: params = call['tags']['parameters']['values'] \ if 'parameters' in call['tags'] else '' row['statement'] = '{} ({})'.format(call['message'], params) for call in call_results[item['key']]: report_id = call['tags']['report_id']['values'] group_id = reports_reversed.get(report_id) if group_id: row['latest_details'].append( {'group_id': group_id, 'report_id': report_id}) final_results.append(row) return final_results
def get_trending(cls, request, filter_settings, limit=15, db_session=None): """ Returns report groups trending for specific time interval """ db_session = get_db_session(db_session) tags = [] if filter_settings.get('tags'): for tag in filter_settings['tags']: tags.append({ 'terms': { 'tags.{}.values'.format(tag['name']): tag['value'] } }) index_names = es_index_name_limiter( start_date=filter_settings['start_date'], end_date=filter_settings['end_date'], ixtypes=['reports']) if not index_names or not filter_settings['resource']: return [] es_query = { 'aggs': { 'parent_agg': { 'aggs': { 'groups': { 'aggs': { 'sub_agg': { 'value_count': { 'field': 'tags.group_id.values' } } }, 'filter': { 'exists': { 'field': 'tags.group_id.values' } } } }, 'terms': { 'field': 'tags.group_id.values', 'size': limit } } }, 'query': { 'filtered': { 'filter': { 'and': [{ 'terms': { 'resource_id': [filter_settings['resource'][0]] } }, { 'range': { 'timestamp': { 'gte': filter_settings['start_date'], 'lte': filter_settings['end_date'] } } }] } } } } if tags: es_query['query']['filtered']['filter']['and'].extend(tags) result = Datastores.es.search(es_query, index=index_names, doc_type='log', size=0) series = [] for bucket in result['aggregations']['parent_agg']['buckets']: series.append({ 'key': bucket['key'], 'groups': bucket['groups']['sub_agg']['value'] }) report_groups_d = {} for g in series: report_groups_d[int(g['key'])] = g['groups'] or 0 query = db_session.query(ReportGroup) query = query.filter(ReportGroup.id.in_(list(report_groups_d.keys()))) query = query.options(sa.orm.joinedload(ReportGroup.last_report_ref)) results = [( report_groups_d[group.id], group, ) for group in query] return sorted(results, reverse=True, key=lambda x: x[0])
def get_report_stats(cls, request, filter_settings): """ Gets report dashboard graphs Returns information for BAR charts with occurences/interval information detailed means version that returns time intervals - non detailed returns total sum """ delta = filter_settings['end_date'] - filter_settings['start_date'] if delta < h.time_deltas.get('12h')['delta']: interval = '1m' elif delta <= h.time_deltas.get('3d')['delta']: interval = '5m' elif delta >= h.time_deltas.get('2w')['delta']: interval = '24h' else: interval = '1h' group_id = filter_settings.get('group_id') es_query = { 'aggs': { 'parent_agg': { 'aggs': { 'types': { 'aggs': { 'sub_agg': { 'terms': { 'field': 'tags.type.values' } } }, 'filter': { 'and': [{ 'exists': { 'field': 'tags.type.values' } }] } } }, 'date_histogram': { 'extended_bounds': { 'max': filter_settings['end_date'], 'min': filter_settings['start_date'] }, 'field': 'timestamp', 'interval': interval, 'min_doc_count': 0 } } }, 'query': { 'filtered': { 'filter': { 'and': [{ 'terms': { 'resource_id': [filter_settings['resource'][0]] } }, { 'range': { 'timestamp': { 'gte': filter_settings['start_date'], 'lte': filter_settings['end_date'] } } }] } } } } if group_id: parent_agg = es_query['aggs']['parent_agg'] filters = parent_agg['aggs']['types']['filter']['and'] filters.append({'terms': {'tags.group_id.values': [group_id]}}) index_names = es_index_name_limiter( start_date=filter_settings['start_date'], end_date=filter_settings['end_date'], ixtypes=['reports']) if not index_names: return [] result = Datastores.es.search(es_query, index=index_names, doc_type='log', size=0) series = [] for bucket in result['aggregations']['parent_agg']['buckets']: point = { 'x': datetime.utcfromtimestamp(int(bucket['key']) / 1000), 'report': 0, 'not_found': 0, 'slow_report': 0 } for subbucket in bucket['types']['sub_agg']['buckets']: if subbucket['key'] == 'slow': point['slow_report'] = subbucket['doc_count'] elif subbucket['key'] == 'error': point['report'] = subbucket['doc_count'] elif subbucket['key'] == 'not_found': point['not_found'] = subbucket['doc_count'] series.append(point) return series
def common_tags(request): config = request.GET.mixed() filter_settings = build_filter_settings_from_query_dict(request, config) resources = list(filter_settings["resource"]) query = { "query": { "bool": { "filter": [{ "terms": { "resource_id": list(resources) } }] } } } start_date = filter_settings.get("start_date") end_date = filter_settings.get("end_date") filter_part = query["query"]["bool"]["filter"] date_range = {"range": {"timestamp": {}}} if start_date: date_range["range"]["timestamp"]["gte"] = start_date if end_date: date_range["range"]["timestamp"]["lte"] = end_date if start_date or end_date: filter_part.append(date_range) levels = filter_settings.get("level") if levels: filter_part.append({"terms": {"log_level": levels}}) namespaces = filter_settings.get("namespace") if namespaces: filter_part.append({"terms": {"namespace": namespaces}}) query["aggs"] = { "sub_agg": { "terms": { "field": "tag_list.keyword", "size": 50 } } } # tags index_names = es_index_name_limiter( ixtypes=[config.get("datasource", "logs")]) result = Datastores.es.search(body=query, index=index_names, doc_type="log", size=0) tag_buckets = result["aggregations"]["sub_agg"].get("buckets", []) # namespaces query["aggs"] = { "sub_agg": { "terms": { "field": "namespace.keyword", "size": 50 } } } result = Datastores.es.search(body=query, index=index_names, doc_type="log", size=0) namespaces_buckets = result["aggregations"]["sub_agg"].get("buckets", []) return { "tags": [item["key"] for item in tag_buckets], "namespaces": [item["key"] for item in namespaces_buckets], }
def get_time_consuming_calls(cls, request, filter_settings, db_session=None): db_session = get_db_session(db_session) # get slow calls from older partitions too index_names = es_index_name_limiter( start_date=filter_settings["start_date"], end_date=filter_settings["end_date"], ixtypes=["slow_calls"], ) if index_names and filter_settings["resource"]: # get longest time taking hashes es_query = { "aggs": { "parent_agg": { "aggs": { "duration": { "aggs": { "sub_agg": { "sum": { "field": "tags.duration.numeric_values" } } }, "filter": { "exists": { "field": "tags.duration.numeric_values" } }, }, "total": { "aggs": { "sub_agg": { "value_count": { "field": "tags.statement_hash.values.keyword" } } }, "filter": { "exists": { "field": "tags.statement_hash.values" } }, }, }, "terms": { "field": "tags.statement_hash.values.keyword", "order": { "duration>sub_agg": "desc" }, "size": 15, }, } }, "query": { "bool": { "filter": [ { "terms": { "resource_id": [filter_settings["resource"][0]] } }, { "range": { "timestamp": { "gte": filter_settings["start_date"], "lte": filter_settings["end_date"], } } }, ] } }, } result = Datastores.es.search(body=es_query, index=index_names, doc_type="log", size=0) results = result["aggregations"]["parent_agg"]["buckets"] else: return [] hashes = [i["key"] for i in results] # get queries associated with hashes calls_query = { "aggs": { "top_calls": { "terms": { "field": "tags.statement_hash.values.keyword", "size": 15, }, "aggs": { "top_calls_hits": { "top_hits": { "sort": { "timestamp": "desc" }, "size": 5 } } }, } }, "query": { "bool": { "filter": [ { "terms": { "resource_id": [filter_settings["resource"][0]] } }, { "terms": { "tags.statement_hash.values": hashes } }, { "range": { "timestamp": { "gte": filter_settings["start_date"], "lte": filter_settings["end_date"], } } }, ] } }, } calls = Datastores.es.search(body=calls_query, index=index_names, doc_type="log", size=0) call_results = {} report_ids = [] for call in calls["aggregations"]["top_calls"]["buckets"]: hits = call["top_calls_hits"]["hits"]["hits"] call_results[call["key"]] = [i["_source"] for i in hits] report_ids.extend( [i["_source"]["tags"]["report_id"]["values"] for i in hits]) if report_ids: r_query = db_session.query(Report.group_id, Report.id) r_query = r_query.filter(Report.id.in_(report_ids)) r_query = r_query.filter( Report.start_time >= filter_settings["start_date"]) else: r_query = [] reports_reversed = {} for report in r_query: reports_reversed[report.id] = report.group_id final_results = [] for item in results: if item["key"] not in call_results: continue call = call_results[item["key"]][0] row = { "occurences": item["total"]["sub_agg"]["value"], "total_duration": round(item["duration"]["sub_agg"]["value"]), "statement": call["message"], "statement_type": call["tags"]["type"]["values"], "statement_subtype": call["tags"]["subtype"]["values"], "statement_hash": item["key"], "latest_details": [], } if row["statement_type"] in ["tmpl", " remote"]: params = (call["tags"]["parameters"]["values"] if "parameters" in call["tags"] else "") row["statement"] = "{} ({})".format(call["message"], params) for call in call_results[item["key"]]: report_id = call["tags"]["report_id"]["values"] group_id = reports_reversed.get(report_id) if group_id: row["latest_details"].append({ "group_id": group_id, "report_id": report_id }) final_results.append(row) return final_results
def get_search_iterator( cls, app_ids=None, page=1, items_per_page=50, order_by=None, filter_settings=None, limit=None, ): if not app_ids: return {} if not filter_settings: filter_settings = {} query = { "size": 0, "query": { "bool": { "must": [], "should": [], "filter": [{"terms": {"resource_id": list(app_ids)}}], } }, "aggs": { "top_groups": { "terms": { "size": 5000, "field": "join_field#report_group", "order": {"newest": "desc"}, }, "aggs": { "top_reports_hits": { "top_hits": {"size": 1, "sort": {"start_time": "desc"}} }, "newest": {"max": {"field": "start_time"}}, }, } }, } start_date = filter_settings.get("start_date") end_date = filter_settings.get("end_date") filter_part = query["query"]["bool"]["filter"] date_range = {"range": {"start_time": {}}} if start_date: date_range["range"]["start_time"]["gte"] = start_date if end_date: date_range["range"]["start_time"]["lte"] = end_date if start_date or end_date: filter_part.append(date_range) priorities = filter_settings.get("priority") for tag in filter_settings.get("tags", []): tag_values = [v.lower() for v in tag["value"]] key = "tags.%s.values" % tag["name"].replace(".", "_") filter_part.append({"terms": {key: tag_values}}) if priorities: filter_part.append( { "has_parent": { "parent_type": "report_group", "query": {"terms": {"priority": priorities}}, } } ) min_occurences = filter_settings.get("min_occurences") if min_occurences: filter_part.append( { "has_parent": { "parent_type": "report_group", "query": {"range": {"occurences": {"gte": min_occurences[0]}}}, } } ) min_duration = filter_settings.get("min_duration") max_duration = filter_settings.get("max_duration") request_ids = filter_settings.get("request_id") if request_ids: filter_part.append({"terms": {"request_id": request_ids}}) duration_range = {"range": {"average_duration": {}}} if min_duration: duration_range["range"]["average_duration"]["gte"] = min_duration[0] if max_duration: duration_range["range"]["average_duration"]["lte"] = max_duration[0] if min_duration or max_duration: filter_part.append( {"has_parent": {"parent_type": "report_group", "query": duration_range}} ) http_status = filter_settings.get("http_status") report_type = filter_settings.get("report_type", [ReportType.error]) # set error report type if http status is not found # and we are dealing with slow reports if not http_status or ReportType.slow in report_type: filter_part.append({"terms": {"report_type": report_type}}) if http_status: filter_part.append({"terms": {"http_status": http_status}}) messages = filter_settings.get("message") if messages: condition = {"match": {"message": " ".join(messages)}} query["query"]["bool"]["must"].append(condition) errors = filter_settings.get("error") if errors: condition = {"match": {"error": " ".join(errors)}} query["query"]["bool"]["must"].append(condition) url_domains = filter_settings.get("url_domain") if url_domains: condition = {"terms": {"url_domain": url_domains}} query["query"]["bool"]["must"].append(condition) url_paths = filter_settings.get("url_path") if url_paths: condition = {"terms": {"url_path": url_paths}} query["query"]["bool"]["must"].append(condition) if filter_settings.get("report_status"): for status in filter_settings.get("report_status"): if status == "never_reviewed": filter_part.append( { "has_parent": { "parent_type": "report_group", "query": {"term": {"read": False}}, } } ) elif status == "reviewed": filter_part.append( { "has_parent": { "parent_type": "report_group", "query": {"term": {"read": True}}, } } ) elif status == "public": filter_part.append( { "has_parent": { "parent_type": "report_group", "query": {"term": {"public": True}}, } } ) elif status == "fixed": filter_part.append( { "has_parent": { "parent_type": "report_group", "query": {"term": {"fixed": True}}, } } ) # logging.getLogger('pyelasticsearch').setLevel(logging.DEBUG) index_names = es_index_name_limiter( filter_settings.get("start_date"), filter_settings.get("end_date"), ixtypes=["reports"], ) if index_names: results = Datastores.es.search( body=query, index=index_names, doc_type=["report", "report_group"], size=0, ) else: return [] return results["aggregations"]
def transform_json_to_es_config( request, json_body, filter_settings, ids_to_override=None, ixtypes=None ): """ Generates a valid elasticsearch query our of json body and filter settings :param request: request object :param json_body: config object generated by angular :param filter_settings: list of search params restricting main resultset WARNING - this restricts the resultset to data that user has permission to see so its critical to be present :param ids_to_override: allows to override the application list that user normally would be able to access - this allows to provide public dashboards functionality :return: {"query": query, "parent_agg": parent_agg, "aggregations": aggregations, "index_names": index_names} """ # determine start and end date for dataset bdry_start_date, bdry_end_date = determine_date_boundries_json(json_body) # in some cases we have explicitly set start and end dates in filter # settings then we use that instead of "relative" config range prediction if filter_settings.get("start_date"): bdry_start_date = filter_settings["start_date"] if filter_settings.get("end_date"): bdry_end_date = filter_settings["end_date"] # inject the dates if missing from filter settings (important for charting) if not filter_settings.get("start_date"): filter_settings["start_date"] = bdry_start_date if not filter_settings.get("end_date"): filter_settings["end_date"] = bdry_end_date parent_agg = json_body.get("parentAgg") if ( parent_agg and parent_agg["type"] == "time_histogram" and parent_agg["config"]["interval"] == "1m" ): delta = filter_settings["end_date"] - filter_settings["start_date"] if delta > timedelta(hours=4): bdry_start_date = filter_settings["end_date"] - timedelta(hours=4) filter_settings["start_date"] = bdry_start_date query = build_query(filter_settings) # AGG part aggregations = json_body.get("aggs") sub_agg = {} for agg in aggregations: subagg_id, parsed_agg = process_agg( request, agg, override_app_ids=ids_to_override ) sub_agg[subagg_id] = parsed_agg # handle ordering on sub aggregations if parent_agg: order_by = parent_agg["config"].get("order_by") else: order_by = None if (not parent_agg or not parent_agg["type"]) and order_by: if str(subagg_id) != order_by["agg"]: continue if parsed_agg["aggs"].get("sub_agg", {}).get("aggs"): parsed_agg["aggs"]["sub_agg"][agg["type"]]["order"] = { "deep_agg": order_by["order"] } else: parsed_agg["aggs"]["sub_agg"][agg["type"]]["order"] = { "_count": order_by["order"] } # input data normalization # will be needed for label generation if parent_agg and parent_agg["type"] and parent_agg["type"] != "time_histogram": parent_agg["field_name"] = fix_dot(parent_agg["config"].get("field")) if parent_agg and parent_agg["type"] == "time_histogram": parent_agg["field_name"] = "timestamp" parent_agg["config"]["field"] = "timestamp" query["aggs"] = { "parent_agg": { "date_histogram": { "field": "timestamp", "interval": parent_agg["config"]["interval"], "min_doc_count": 0, "extended_bounds": {"max": bdry_end_date}, }, "aggs": sub_agg, } } date_histogram = query["aggs"]["parent_agg"]["date_histogram"] if bdry_start_date < filter_settings["start_date"]: date_histogram["extended_bounds"]["min"] = filter_settings["start_date"] else: date_histogram["extended_bounds"]["min"] = bdry_start_date elif parent_agg and parent_agg["type"]: query["aggs"] = {"parent_agg": {parent_agg["type"]: {}, "aggs": sub_agg}} if parent_agg["config"].get("size"): size = int(parent_agg["config"]["size"]) query["aggs"]["parent_agg"][parent_agg["type"]]["size"] = size # handle ordering of nested metrics if parent_agg["config"].get("order_by"): metric = "%s>sub_agg" % parent_agg["config"]["order_by"]["agg"] query["aggs"]["parent_agg"][parent_agg["type"]]["order"] = { metric: parent_agg["config"]["order_by"]["order"] } key = "tags.%s.values" % fix_dot(parent_agg["config"].get("field")) query["aggs"]["parent_agg"][parent_agg["type"]]["field"] = key else: query["aggs"] = sub_agg def float_test(input): try: float(input) return False except ValueError: return True tags_to_check = set() # check if we need to check indices for fields being present for agg in aggregations: computed_fields = agg.get("computed_fields", []) filtered_fields = list( filter(float_test, [f["field"] for f in computed_fields if f["field"]]) ) if agg.get("computed"): tags_to_check = tags_to_check.union(set(filtered_fields)) if agg.get("deepAggEnabled"): computed_fields = agg["deepAgg"].get("computed_fields", []) filtered_fields = list( filter(float_test, [f["field"] for f in computed_fields if f["field"]]) ) tags_to_check = tags_to_check.union(set(filtered_fields)) if not ixtypes: ixtypes = [json_body.get("datasource", "logs")] index_names = es_index_name_limiter( start_date=bdry_start_date, end_date=bdry_end_date, ixtypes=ixtypes ) # exclude indices that don't have tags that we use in expression if tags_to_check: exclude_indices = [] tags_to_check = set(map(fix_dot, tags_to_check)) if index_names: mappings = request.es_conn.get_mapping(index=index_names, doc_type="log") else: mappings = {} for k, v in mappings.items(): tags = v["mappings"]["log"]["properties"].get("tags", {"properties": {}}) if "properties" not in tags: exclude_indices.append(k) continue mapped_keys = list(tags["properties"].keys()) if not tags_to_check.issubset(mapped_keys): exclude_indices.append(k) index_names = set(index_names).difference(exclude_indices) return { "query": query, "parent_agg": parent_agg, "aggregations": aggregations, "index_names": index_names, }
def uptime_for_resource(cls, request, filter_settings): delta = filter_settings["end_date"] - filter_settings["start_date"] if delta < h.time_deltas.get("12h")["delta"]: interval = "1m" elif delta <= h.time_deltas.get("3d")["delta"]: interval = "5m" elif delta >= h.time_deltas.get("2w")["delta"]: interval = "24h" else: interval = "1h" chart_config = { "parentAgg": {"config": {"interval": interval}, "type": "time_histogram"}, "aggs": [ { "config": {"field": "response_time", "label": "requests"}, "type": "avg", "id": "response_time", } ], } index_names = es_index_name_limiter( start_date=filter_settings["start_date"], end_date=filter_settings["end_date"], ixtypes=["uptime"], ) result_dict = { "name": "metrics", "chart_type": chart_config.get("chartType"), "parent_agg": chart_config["parentAgg"], "series": [], "system_labels": {}, "groups": [], "rect_regions": [], "categories": [], } if not index_names: return result_dict es_query = { "query": { "bool": { "filter": [ { "terms": { "resource_id": [filter_settings["resource"][0]] } }, { "range": { "timestamp": { "gte": filter_settings["start_date"], "lte": filter_settings["end_date"], } } }, ] } }, "aggs": { "parent_agg": { "date_histogram": { "field": "timestamp", "interval": interval, "extended_bounds": { "max": filter_settings["end_date"], "min": filter_settings["start_date"], }, "min_doc_count": 0, }, "aggs": { "response_time": { "filter": { "bool":{ "filter": [ { "exists": { "field": "tags.response_time.numeric_values" } } ] } }, "aggs": { "sub_agg": { "avg": { "field": "tags.response_time.numeric_values" } } }, } }, } }, } result = request.es_conn.search( body=es_query, index=index_names, doc_type="log", size=0 ) plot_data = [] for item in result["aggregations"]["parent_agg"]["buckets"]: x_time = datetime.utcfromtimestamp(int(item["key"]) / 1000) point = {"x": x_time} value = item["response_time"]["sub_agg"]["value"] point["response_time"] = round(value, 3) if value else 0 plot_data.append(point) result_dict["series"] = plot_data return result_dict
def get_report_stats(cls, request, filter_settings): """ Gets report dashboard graphs Returns information for BAR charts with occurences/interval information detailed means version that returns time intervals - non detailed returns total sum """ delta = filter_settings["end_date"] - filter_settings["start_date"] if delta < h.time_deltas.get("12h")["delta"]: interval = "1m" elif delta <= h.time_deltas.get("3d")["delta"]: interval = "5m" elif delta >= h.time_deltas.get("2w")["delta"]: interval = "24h" else: interval = "1h" group_id = filter_settings.get("group_id") es_query = { "aggs": { "parent_agg": { "aggs": { "types": { "aggs": { "sub_agg": { "terms": {"field": "tags.type.values.keyword"} } }, "filter": { "bool": { "filter": [ {"exists": {"field": "tags.type.values"}} ] } }, } }, "date_histogram": { "extended_bounds": { "max": filter_settings["end_date"], "min": filter_settings["start_date"], }, "field": "timestamp", "interval": interval, "min_doc_count": 0, }, } }, "query": { "bool": { "filter": [ {"terms": {"resource_id": [filter_settings["resource"][0]]}}, { "range": { "timestamp": { "gte": filter_settings["start_date"], "lte": filter_settings["end_date"], } } }, ] } }, } if group_id: parent_agg = es_query["aggs"]["parent_agg"] filters = parent_agg["aggs"]["types"]["filter"]["bool"]["filter"] filters.append({"terms": {"tags.group_id.values": [group_id]}}) index_names = es_index_name_limiter( start_date=filter_settings["start_date"], end_date=filter_settings["end_date"], ixtypes=["reports"], ) if not index_names: return [] result = Datastores.es.search( body=es_query, index=index_names, doc_type="log", size=0 ) series = [] for bucket in result["aggregations"]["parent_agg"]["buckets"]: point = { "x": datetime.utcfromtimestamp(int(bucket["key"]) / 1000), "report": 0, "not_found": 0, "slow_report": 0, } for subbucket in bucket["types"]["sub_agg"]["buckets"]: if subbucket["key"] == "slow": point["slow_report"] = subbucket["doc_count"] elif subbucket["key"] == "error": point["report"] = subbucket["doc_count"] elif subbucket["key"] == "not_found": point["not_found"] = subbucket["doc_count"] series.append(point) return series
def get_trending(cls, request, filter_settings, limit=15, db_session=None): """ Returns report groups trending for specific time interval """ db_session = get_db_session(db_session) tags = [] if filter_settings.get("tags"): for tag in filter_settings["tags"]: tags.append( {"terms": {"tags.{}.values".format(tag["name"]): tag["value"]}} ) index_names = es_index_name_limiter( start_date=filter_settings["start_date"], end_date=filter_settings["end_date"], ixtypes=["reports"], ) if not index_names or not filter_settings["resource"]: return [] es_query = { "aggs": { "parent_agg": { "aggs": { "groups": { "aggs": { "sub_agg": { "value_count": { "field": "tags.group_id.values.keyword" } } }, "filter": {"exists": {"field": "tags.group_id.values"}}, } }, "terms": {"field": "tags.group_id.values.keyword", "size": limit}, } }, "query": { "bool": { "filter": [ {"terms": {"resource_id": [filter_settings["resource"][0]]}}, { "range": { "timestamp": { "gte": filter_settings["start_date"], "lte": filter_settings["end_date"], } } }, ] } }, } if tags: es_query["query"]["bool"]["filter"].extend(tags) result = Datastores.es.search( body=es_query, index=index_names, doc_type="report", size=0 ) series = [] for bucket in result["aggregations"]["parent_agg"]["buckets"]: series.append( {"key": bucket["key"], "groups": bucket["groups"]["sub_agg"]["value"]} ) report_groups_d = {} for g in series: report_groups_d[int(g["key"])] = g["groups"] or 0 query = db_session.query(ReportGroup) query = query.filter(ReportGroup.id.in_(list(report_groups_d.keys()))) query = query.options(sa.orm.joinedload(ReportGroup.last_report_ref)) results = [(report_groups_d[group.id], group) for group in query] return sorted(results, reverse=True, key=lambda x: x[0])