def make_opensearch(index,
                    filters,
                    queries=None,
                    exclusion_filters=None,
                    range_filters=None,
                    prefix_filters=None,
                    terms_filters=None,
                    es_url='https://opensearch.lco.global'):
    """
    Make an OpenSearch query

    Parameters
    ----------
    index : str
            Name of index to search
    filters : list of dicts
              Each dict has a criterion for an OpenSearch "filter"
    queries : list of dicts
              Each dict has a "type" and "query" entry. The 'query' entry is a dict that has a criterion for an
              OpenSearch "query"
    exclusion_filters : list of dicts
                        Each dict has a criterion for an OpenSearch "exclude"
    range_filters: list of dicts
                   Each dict has a criterion an OpenSearch "range filter"
    prefix_filters:
    terms_filters:
    es_url : str
             URL of the OpenSearch host

    Returns
    -------
    search : opensearch_dsl.Search
             The OpenSearch object
    """
    if queries is None:
        queries = []
    if exclusion_filters is None:
        exclusion_filters = []
    if range_filters is None:
        range_filters = []
    if terms_filters is None:
        terms_filters = []
    if prefix_filters is None:
        prefix_filters = []
    es = OpenSearch(es_url)
    s = Search(using=es, index=index)
    for f in filters:
        s = s.filter('term', **f)
    for f in terms_filters:
        s = s.filter('terms', **f)
    for f in range_filters:
        s = s.filter('range', **f)
    for f in prefix_filters:
        s = s.filter('prefix', **f)
    for f in exclusion_filters:
        s = s.exclude('term', **f)
    for q in queries:
        s = s.query(q['type'], **q['query'])
    return s
예제 #2
0
def multiple_aggregate_search(es_connection, index_pattern, search_query, aggregation_type, aggregation_field_one, aggregation_field_two, sort='@timestamp', limit_to_fields=[], date_start='now-1d/d', date_end='now'):
    s = Search(using=es_connection, index=index_pattern, doc_type='_doc')
    s = s.query('query_string', query=search_query)
    if len(limit_to_fields) != 0:
	    s = s.source(limit_to_fields)
    s = s.sort(sort)
    s = s.filter('range', **{'@timestamp': {'gte': date_start, 'lt': date_end}})
    # The four lines above could be summarized into the line below based on your preference:
    # s = Search(using=es_connection, index='lab4.1-complete', doc_type='_doc').query('query_string', query='tags:internal_source').source(['source_ip']).sort('source_ip')
    s.aggs.bucket(aggregation_field_one, 'terms', field=aggregation_field_one, size=999999).metric('Count', aggregation_type, field=aggregation_field_one)
    s.aggs.bucket(aggregation_field_two, 'terms', field=aggregation_field_two, size=999999).metric('Count', aggregation_type, field=aggregation_field_two)
    response = s.execute()
    aggregation_one = [ x['key'] for x in response.aggregations[aggregation_field_one].buckets ]
    aggregation_two = [ x['key'] for x in response.aggregations[aggregation_field_two].buckets ]
    return { aggregation_one[i]: aggregation_two[i] for i in range(len(aggregation_one)) }
    return list(zip([ x['key'] for x in response.aggregations[aggregation_field_one].buckets ], [ x['key'] for x in response.aggregations[aggregation_field_two].buckets ]))
예제 #3
0
def build_search(es_connection, index, query, sort='@timestamp', limit_to_fields=[]):
	"""[summary]

	Args:
		index ([string]): [Index pattern to search against]
		query ([string]): [Lucene query to limit results]
		sort (str, optional): [Sort filter]. Defaults to '@timestamp'.
		limit_to_fields (list, optional): [Limit which fields to return]. Defaults to [].

	Returns:
		[type]: [description]
	"""
	search = Search(using=es_connection, index=index, doc_type='_doc')
	search = search.query('query_string', query=query)
	if len(limit_to_fields) != 0:
		search = search.source(limit_to_fields)
	search = search.sort(sort)
	return search
예제 #4
0
def aggregate_search(es_connection, index_pattern, search_query, aggregation_type, aggregation_field, sort='@timestamp', limit_to_fields=[], date_start='now-1d/d', date_end='now', result_size=100, interval='auto'):
    s = Search(using=es_connection, index=index_pattern, doc_type='_doc')
    s = s.query('query_string', query=search_query)
    if len(limit_to_fields) != 0:
            s = s.source(limit_to_fields)
    s = s.sort(sort)
    if date_start != 'ignore':
        s = s.filter('range', **{sort: {'gte': date_start, 'lt': date_end}})
    s.aggs.bucket(aggregation_field, 'terms', field=aggregation_field, size=result_size)
    if aggregation_type == 'date_histogram':
        s.aggs[aggregation_field].metric('Count', aggregation_type, field=aggregation_field, interval=interval)
    else:
        s.aggs[aggregation_field].metric('Count', aggregation_type, field=aggregation_field)
    response = s.execute()
    if aggregation_type in ["terms", "auto_date_histogram", "date_histogram"]:
        data = [ x for x in response.aggregations[aggregation_field].buckets ]
        return_dict = {}
        for row in data:
            field = row['key']
            value = row['doc_count']
            return_dict[field] = value
        return return_dict
    else:
        return [ x for x in response.aggregations[aggregation_field].buckets ]