def make_opensearch(index,
                    filters,
                    queries=None,
                    exclusion_filters=None,
                    range_filters=None,
                    prefix_filters=None,
                    terms_filters=None,
                    es_url='https://opensearch.lco.global'):
    """
    Make an OpenSearch query

    Parameters
    ----------
    index : str
            Name of index to search
    filters : list of dicts
              Each dict has a criterion for an OpenSearch "filter"
    queries : list of dicts
              Each dict has a "type" and "query" entry. The 'query' entry is a dict that has a criterion for an
              OpenSearch "query"
    exclusion_filters : list of dicts
                        Each dict has a criterion for an OpenSearch "exclude"
    range_filters: list of dicts
                   Each dict has a criterion an OpenSearch "range filter"
    prefix_filters:
    terms_filters:
    es_url : str
             URL of the OpenSearch host

    Returns
    -------
    search : opensearch_dsl.Search
             The OpenSearch object
    """
    if queries is None:
        queries = []
    if exclusion_filters is None:
        exclusion_filters = []
    if range_filters is None:
        range_filters = []
    if terms_filters is None:
        terms_filters = []
    if prefix_filters is None:
        prefix_filters = []
    es = OpenSearch(es_url)
    s = Search(using=es, index=index)
    for f in filters:
        s = s.filter('term', **f)
    for f in terms_filters:
        s = s.filter('terms', **f)
    for f in range_filters:
        s = s.filter('range', **f)
    for f in prefix_filters:
        s = s.filter('prefix', **f)
    for f in exclusion_filters:
        s = s.exclude('term', **f)
    for q in queries:
        s = s.query(q['type'], **q['query'])
    return s
Example #2
0
def get_search_by_entities_query(
        entities,
        term=None,
        filter_data=None,
        composite_field_mapping=None,
        permission_filters=None,
        ordering=None,
        fields_to_include=None,
        fields_to_exclude=None,
):
    """
    Performs filtered search for the given term across given entities.
    """
    filter_data = filter_data or {}
    query = []
    if term != '':
        for entity in entities:
            query.append(_build_term_query(term, fields=entity.SEARCH_FIELDS))

    filters, ranges = _split_range_fields(filter_data)

    # document must match all filters in the list (and)
    must_filter = _build_must_queries(filters, ranges, composite_field_mapping)

    s = Search(
        index=[
            entity.get_read_alias()
            for entity in entities
        ],
    ).query(
        Bool(must=query),
    ).extra(
        track_total_hits=True,
    )

    permission_query = _build_entity_permission_query(permission_filters)
    if permission_query:
        s = s.filter(permission_query)

    s = s.filter(Bool(must=must_filter))
    s = _apply_sorting_to_query(s, ordering)
    return _apply_source_filtering_to_query(
        s,
        fields_to_include=fields_to_include,
        fields_to_exclude=fields_to_exclude,
    )
Example #3
0
def get_basic_search_query(
        entity,
        term,
        permission_filters_by_entity=None,
        offset=0,
        limit=100,
        fields_to_exclude=None,
        fuzzy=False,
):
    """
    Performs basic search for the given term in the given entity using the SEARCH_FIELDS.
    It also returns number of results in other entities.

    :param permission_filters_by_entity: List of pairs of entities and corresponding permission
                                         filters. Only entities in this list are included in the
                                         results, and those are entities are also filtered using
                                         the corresponding permission filters.
    """
    limit = _clip_limit(offset, limit)

    search_apps = tuple(get_global_search_apps_as_mapping().values())
    indices = [app.search_model.get_read_alias() for app in search_apps]
    fields = set(chain.from_iterable(app.search_model.SEARCH_FIELDS for app in search_apps))

    # Sort the fields so that this function is deterministic
    # and the same query is always generated with the same inputs
    fields = sorted(fields)

    query = _build_term_query(term, fields=fields, fuzzy=fuzzy)
    search = Search(index=indices).query(query)

    permission_query = _build_global_permission_query(permission_filters_by_entity)
    if permission_query:
        search = search.filter(permission_query)

    search = search.post_filter(
        Bool(
            should=Term(_document_type=entity.get_app_name()),
        ),
    ).sort(
        '_score',
        'id',
    ).source(
        excludes=fields_to_exclude,
    ).extra(
        track_total_hits=True,
    )

    search.aggs.bucket(
        'count_by_type', 'terms', field='_document_type',
    )

    return search[offset:offset + limit]
Example #4
0
def multiple_aggregate_search(es_connection, index_pattern, search_query, aggregation_type, aggregation_field_one, aggregation_field_two, sort='@timestamp', limit_to_fields=[], date_start='now-1d/d', date_end='now'):
    s = Search(using=es_connection, index=index_pattern, doc_type='_doc')
    s = s.query('query_string', query=search_query)
    if len(limit_to_fields) != 0:
	    s = s.source(limit_to_fields)
    s = s.sort(sort)
    s = s.filter('range', **{'@timestamp': {'gte': date_start, 'lt': date_end}})
    # The four lines above could be summarized into the line below based on your preference:
    # s = Search(using=es_connection, index='lab4.1-complete', doc_type='_doc').query('query_string', query='tags:internal_source').source(['source_ip']).sort('source_ip')
    s.aggs.bucket(aggregation_field_one, 'terms', field=aggregation_field_one, size=999999).metric('Count', aggregation_type, field=aggregation_field_one)
    s.aggs.bucket(aggregation_field_two, 'terms', field=aggregation_field_two, size=999999).metric('Count', aggregation_type, field=aggregation_field_two)
    response = s.execute()
    aggregation_one = [ x['key'] for x in response.aggregations[aggregation_field_one].buckets ]
    aggregation_two = [ x['key'] for x in response.aggregations[aggregation_field_two].buckets ]
    return { aggregation_one[i]: aggregation_two[i] for i in range(len(aggregation_one)) }
    return list(zip([ x['key'] for x in response.aggregations[aggregation_field_one].buckets ], [ x['key'] for x in response.aggregations[aggregation_field_two].buckets ]))
Example #5
0
def aggregate_search(es_connection, index_pattern, search_query, aggregation_type, aggregation_field, sort='@timestamp', limit_to_fields=[], date_start='now-1d/d', date_end='now', result_size=100, interval='auto'):
    s = Search(using=es_connection, index=index_pattern, doc_type='_doc')
    s = s.query('query_string', query=search_query)
    if len(limit_to_fields) != 0:
            s = s.source(limit_to_fields)
    s = s.sort(sort)
    if date_start != 'ignore':
        s = s.filter('range', **{sort: {'gte': date_start, 'lt': date_end}})
    s.aggs.bucket(aggregation_field, 'terms', field=aggregation_field, size=result_size)
    if aggregation_type == 'date_histogram':
        s.aggs[aggregation_field].metric('Count', aggregation_type, field=aggregation_field, interval=interval)
    else:
        s.aggs[aggregation_field].metric('Count', aggregation_type, field=aggregation_field)
    response = s.execute()
    if aggregation_type in ["terms", "auto_date_histogram", "date_histogram"]:
        data = [ x for x in response.aggregations[aggregation_field].buckets ]
        return_dict = {}
        for row in data:
            field = row['key']
            value = row['doc_count']
            return_dict[field] = value
        return return_dict
    else:
        return [ x for x in response.aggregations[aggregation_field].buckets ]