def make_opensearch(index, filters, queries=None, exclusion_filters=None, range_filters=None, prefix_filters=None, terms_filters=None, es_url='https://opensearch.lco.global'): """ Make an OpenSearch query Parameters ---------- index : str Name of index to search filters : list of dicts Each dict has a criterion for an OpenSearch "filter" queries : list of dicts Each dict has a "type" and "query" entry. The 'query' entry is a dict that has a criterion for an OpenSearch "query" exclusion_filters : list of dicts Each dict has a criterion for an OpenSearch "exclude" range_filters: list of dicts Each dict has a criterion an OpenSearch "range filter" prefix_filters: terms_filters: es_url : str URL of the OpenSearch host Returns ------- search : opensearch_dsl.Search The OpenSearch object """ if queries is None: queries = [] if exclusion_filters is None: exclusion_filters = [] if range_filters is None: range_filters = [] if terms_filters is None: terms_filters = [] if prefix_filters is None: prefix_filters = [] es = OpenSearch(es_url) s = Search(using=es, index=index) for f in filters: s = s.filter('term', **f) for f in terms_filters: s = s.filter('terms', **f) for f in range_filters: s = s.filter('range', **f) for f in prefix_filters: s = s.filter('prefix', **f) for f in exclusion_filters: s = s.exclude('term', **f) for q in queries: s = s.query(q['type'], **q['query']) return s
def multiple_aggregate_search(es_connection, index_pattern, search_query, aggregation_type, aggregation_field_one, aggregation_field_two, sort='@timestamp', limit_to_fields=[], date_start='now-1d/d', date_end='now'): s = Search(using=es_connection, index=index_pattern, doc_type='_doc') s = s.query('query_string', query=search_query) if len(limit_to_fields) != 0: s = s.source(limit_to_fields) s = s.sort(sort) s = s.filter('range', **{'@timestamp': {'gte': date_start, 'lt': date_end}}) # The four lines above could be summarized into the line below based on your preference: # s = Search(using=es_connection, index='lab4.1-complete', doc_type='_doc').query('query_string', query='tags:internal_source').source(['source_ip']).sort('source_ip') s.aggs.bucket(aggregation_field_one, 'terms', field=aggregation_field_one, size=999999).metric('Count', aggregation_type, field=aggregation_field_one) s.aggs.bucket(aggregation_field_two, 'terms', field=aggregation_field_two, size=999999).metric('Count', aggregation_type, field=aggregation_field_two) response = s.execute() aggregation_one = [ x['key'] for x in response.aggregations[aggregation_field_one].buckets ] aggregation_two = [ x['key'] for x in response.aggregations[aggregation_field_two].buckets ] return { aggregation_one[i]: aggregation_two[i] for i in range(len(aggregation_one)) } return list(zip([ x['key'] for x in response.aggregations[aggregation_field_one].buckets ], [ x['key'] for x in response.aggregations[aggregation_field_two].buckets ]))
def build_search(es_connection, index, query, sort='@timestamp', limit_to_fields=[]): """[summary] Args: index ([string]): [Index pattern to search against] query ([string]): [Lucene query to limit results] sort (str, optional): [Sort filter]. Defaults to '@timestamp'. limit_to_fields (list, optional): [Limit which fields to return]. Defaults to []. Returns: [type]: [description] """ search = Search(using=es_connection, index=index, doc_type='_doc') search = search.query('query_string', query=query) if len(limit_to_fields) != 0: search = search.source(limit_to_fields) search = search.sort(sort) return search
def aggregate_search(es_connection, index_pattern, search_query, aggregation_type, aggregation_field, sort='@timestamp', limit_to_fields=[], date_start='now-1d/d', date_end='now', result_size=100, interval='auto'): s = Search(using=es_connection, index=index_pattern, doc_type='_doc') s = s.query('query_string', query=search_query) if len(limit_to_fields) != 0: s = s.source(limit_to_fields) s = s.sort(sort) if date_start != 'ignore': s = s.filter('range', **{sort: {'gte': date_start, 'lt': date_end}}) s.aggs.bucket(aggregation_field, 'terms', field=aggregation_field, size=result_size) if aggregation_type == 'date_histogram': s.aggs[aggregation_field].metric('Count', aggregation_type, field=aggregation_field, interval=interval) else: s.aggs[aggregation_field].metric('Count', aggregation_type, field=aggregation_field) response = s.execute() if aggregation_type in ["terms", "auto_date_histogram", "date_histogram"]: data = [ x for x in response.aggregations[aggregation_field].buckets ] return_dict = {} for row in data: field = row['key'] value = row['doc_count'] return_dict[field] = value return return_dict else: return [ x for x in response.aggregations[aggregation_field].buckets ]