def make_opensearch(index, filters, queries=None, exclusion_filters=None, range_filters=None, prefix_filters=None, terms_filters=None, es_url='https://opensearch.lco.global'): """ Make an OpenSearch query Parameters ---------- index : str Name of index to search filters : list of dicts Each dict has a criterion for an OpenSearch "filter" queries : list of dicts Each dict has a "type" and "query" entry. The 'query' entry is a dict that has a criterion for an OpenSearch "query" exclusion_filters : list of dicts Each dict has a criterion for an OpenSearch "exclude" range_filters: list of dicts Each dict has a criterion an OpenSearch "range filter" prefix_filters: terms_filters: es_url : str URL of the OpenSearch host Returns ------- search : opensearch_dsl.Search The OpenSearch object """ if queries is None: queries = [] if exclusion_filters is None: exclusion_filters = [] if range_filters is None: range_filters = [] if terms_filters is None: terms_filters = [] if prefix_filters is None: prefix_filters = [] es = OpenSearch(es_url) s = Search(using=es, index=index) for f in filters: s = s.filter('term', **f) for f in terms_filters: s = s.filter('terms', **f) for f in range_filters: s = s.filter('range', **f) for f in prefix_filters: s = s.filter('prefix', **f) for f in exclusion_filters: s = s.exclude('term', **f) for q in queries: s = s.query(q['type'], **q['query']) return s
def get_search_by_entities_query( entities, term=None, filter_data=None, composite_field_mapping=None, permission_filters=None, ordering=None, fields_to_include=None, fields_to_exclude=None, ): """ Performs filtered search for the given term across given entities. """ filter_data = filter_data or {} query = [] if term != '': for entity in entities: query.append(_build_term_query(term, fields=entity.SEARCH_FIELDS)) filters, ranges = _split_range_fields(filter_data) # document must match all filters in the list (and) must_filter = _build_must_queries(filters, ranges, composite_field_mapping) s = Search( index=[ entity.get_read_alias() for entity in entities ], ).query( Bool(must=query), ).extra( track_total_hits=True, ) permission_query = _build_entity_permission_query(permission_filters) if permission_query: s = s.filter(permission_query) s = s.filter(Bool(must=must_filter)) s = _apply_sorting_to_query(s, ordering) return _apply_source_filtering_to_query( s, fields_to_include=fields_to_include, fields_to_exclude=fields_to_exclude, )
def get_basic_search_query( entity, term, permission_filters_by_entity=None, offset=0, limit=100, fields_to_exclude=None, fuzzy=False, ): """ Performs basic search for the given term in the given entity using the SEARCH_FIELDS. It also returns number of results in other entities. :param permission_filters_by_entity: List of pairs of entities and corresponding permission filters. Only entities in this list are included in the results, and those are entities are also filtered using the corresponding permission filters. """ limit = _clip_limit(offset, limit) search_apps = tuple(get_global_search_apps_as_mapping().values()) indices = [app.search_model.get_read_alias() for app in search_apps] fields = set(chain.from_iterable(app.search_model.SEARCH_FIELDS for app in search_apps)) # Sort the fields so that this function is deterministic # and the same query is always generated with the same inputs fields = sorted(fields) query = _build_term_query(term, fields=fields, fuzzy=fuzzy) search = Search(index=indices).query(query) permission_query = _build_global_permission_query(permission_filters_by_entity) if permission_query: search = search.filter(permission_query) search = search.post_filter( Bool( should=Term(_document_type=entity.get_app_name()), ), ).sort( '_score', 'id', ).source( excludes=fields_to_exclude, ).extra( track_total_hits=True, ) search.aggs.bucket( 'count_by_type', 'terms', field='_document_type', ) return search[offset:offset + limit]
def multiple_aggregate_search(es_connection, index_pattern, search_query, aggregation_type, aggregation_field_one, aggregation_field_two, sort='@timestamp', limit_to_fields=[], date_start='now-1d/d', date_end='now'): s = Search(using=es_connection, index=index_pattern, doc_type='_doc') s = s.query('query_string', query=search_query) if len(limit_to_fields) != 0: s = s.source(limit_to_fields) s = s.sort(sort) s = s.filter('range', **{'@timestamp': {'gte': date_start, 'lt': date_end}}) # The four lines above could be summarized into the line below based on your preference: # s = Search(using=es_connection, index='lab4.1-complete', doc_type='_doc').query('query_string', query='tags:internal_source').source(['source_ip']).sort('source_ip') s.aggs.bucket(aggregation_field_one, 'terms', field=aggregation_field_one, size=999999).metric('Count', aggregation_type, field=aggregation_field_one) s.aggs.bucket(aggregation_field_two, 'terms', field=aggregation_field_two, size=999999).metric('Count', aggregation_type, field=aggregation_field_two) response = s.execute() aggregation_one = [ x['key'] for x in response.aggregations[aggregation_field_one].buckets ] aggregation_two = [ x['key'] for x in response.aggregations[aggregation_field_two].buckets ] return { aggregation_one[i]: aggregation_two[i] for i in range(len(aggregation_one)) } return list(zip([ x['key'] for x in response.aggregations[aggregation_field_one].buckets ], [ x['key'] for x in response.aggregations[aggregation_field_two].buckets ]))
def aggregate_search(es_connection, index_pattern, search_query, aggregation_type, aggregation_field, sort='@timestamp', limit_to_fields=[], date_start='now-1d/d', date_end='now', result_size=100, interval='auto'): s = Search(using=es_connection, index=index_pattern, doc_type='_doc') s = s.query('query_string', query=search_query) if len(limit_to_fields) != 0: s = s.source(limit_to_fields) s = s.sort(sort) if date_start != 'ignore': s = s.filter('range', **{sort: {'gte': date_start, 'lt': date_end}}) s.aggs.bucket(aggregation_field, 'terms', field=aggregation_field, size=result_size) if aggregation_type == 'date_histogram': s.aggs[aggregation_field].metric('Count', aggregation_type, field=aggregation_field, interval=interval) else: s.aggs[aggregation_field].metric('Count', aggregation_type, field=aggregation_field) response = s.execute() if aggregation_type in ["terms", "auto_date_histogram", "date_histogram"]: data = [ x for x in response.aggregations[aggregation_field].buckets ] return_dict = {} for row in data: field = row['key'] value = row['doc_count'] return_dict[field] = value return return_dict else: return [ x for x in response.aggregations[aggregation_field].buckets ]