def session_times(): # {"@fields": {}, "@timestamp": "2015-02-23T17:03:41.738412Z", "@source_host": "newair.brainbot.com", "@message": "scenario.p2p_connect.started"} start_message = 'scenario.p2p_connect.starting.clients.sequentially' stop_message = 'scenario.p2p_connect.stopping.clients' s = Search(client) s = s.filter('bool', should=[ F('term', message=start_message), F('term', message=stop_message) ]) s = s.fields(['message', '@timestamp']) s = s[0:100000] s = s.sort('-@timestamp') # desc, we want the latest events response = s.execute() events = [] # joungest to oldest, last should be a stop message for h in response: msg = 'start' if h['message'][0] == start_message else 'stop' ts = h['@timestamp'][0] events.append((msg, ts)) assert not events or events[0][0] == 'stop' sessions = [] while len(events) >= 2: stop = events.pop() start = events.pop() sessions.append(dict([start, stop])) return list(reversed(sessions))
def filter_queryset(self, request, qs, view): return qs.filter( Bool(must=[F('term', status=amo.REVIEWED_STATUSES)], must_not=[ F('term', is_deleted=True), F('term', is_listed=False), F('term', is_disabled=True) ]))
def get_es_filter(self): app_id, low, high = self.get_values() return [ F('range', **{'current_version.compatible_apps.%d.min' % app_id: {'lte': low}}), F('range', **{'current_version.compatible_apps.%d.max' % app_id: {'gte': high}}), ]
def test_complex_example(): s = search.Search() s = s.query('match', title='python') \ .query(~Q('match', title='ruby')) \ .filter(F('term', category='meetup') | F('term', category='conference')) \ .post_filter('terms', tags=['prague', 'czech']) \ .script_fields(more_attendees="doc['attendees'].value + 42") s.aggs.bucket('per_country', 'terms', field='country')\ .metric('avg_attendees', 'avg', field='attendees') s.query.minimum_should_match = 2 s = s.highlight_options(order='score').highlight('title', 'body', fragment_size=50) assert { 'query': { 'filtered': { 'filter': { 'bool': { 'should': [ {'term': {'category': 'meetup'}}, {'term': {'category': 'conference'}} ] } }, 'query': { 'bool': { 'must': [ {'match': {'title': 'python'}}], 'must_not': [{'match': {'title': 'ruby'}}], 'minimum_should_match': 2 } } } }, 'post_filter': { 'terms': {'tags': ['prague', 'czech']} }, 'aggs': { 'per_country': { 'terms': {'field': 'country'}, 'aggs': { 'avg_attendees': {'avg': {'field': 'attendees'}} } } }, "highlight": { 'order': 'score', 'fields': { 'title': {'fragment_size': 50}, 'body': {'fragment_size': 50} } }, 'script_fields': { 'more_attendees': {'script': "doc['attendees'].value + 42"} } } == s.to_dict()
def filter_queryset(self, request, qs, view): return qs.filter( Bool(must=[ F('terms', status=amo.REVIEWED_STATUSES), F('exists', field='current_version') ], must_not=[ F('term', is_deleted=True), F('term', is_disabled=True) ]))
def filter_queryset(self, request, queryset, view): # Note: only Extensions have is_deleted, for Webapps the status is # changed when deleted. That's why a must_not is used, it will be true # even if the field does not exist. return queryset.filter( Bool(must=[ F('term', status=mkt.STATUS_PUBLIC), F('term', is_disabled=False) ], must_not=[F('term', is_deleted=True)]))
def filter_queryset(self, request, queryset, view): is_homescreen = request.GET.get('is_homescreen') if is_homescreen is None: return queryset if is_homescreen == u'false': return queryset.filter( Bool(must_not=[F('term', is_homescreen=True)])) else: return queryset.filter(Bool(must=[F('term', is_homescreen=True)]))
def get_es_filter(self): app_id, low, high = self.get_values() return [ F('range', **{'appversion.%d.min' % app_id: { 'lte': low }}), F('range', **{'appversion.%d.max' % app_id: { 'gte': high }}), ]
def filter_queryset(self, request, queryset, view): queryset = super(ReviewerSearchFormFilter, self).filter_queryset(request, queryset, view) # Special case for `is_tarako`, which gets converted to a tag filter. is_tarako = self.form_data.get('is_tarako') if is_tarako is not None: if is_tarako: queryset = queryset.filter( Bool(must=[F('term', tags='tarako')])) else: queryset = queryset.filter( Bool(must=[~F('term', tags='tarako')])) return queryset
def test_connections(clients): len_clients = len(clients) min_peers = len_clients if len_clients <= 3 else 3 assert_connected(minconnected=len_clients, minpeers=min_peers, offset=offset) guids = [nodeid_tool.topub(ext_id.encode('utf-8')) for ext_id in clients] for guid in guids: s = Search(client) s = s.filter('exists', field='json_message.p2p.connected.ts') s = s.filter(F('term', guid=guid)) s = s.filter(F('term', remote_id=guid)) response = s.execute() # pprint (response) assert response.hits.total == 0, 'a client is connected to itself' print 'PASS: no client is connected to itself'
def filter_queryset(self, request, queryset, view): form = view.form_class(request.GET) if not form.is_valid(): raise form_errors(form) self.form_data = form.cleaned_data data = {} for k, v in self.form_data.items(): data[self.FORM_TO_FIELD_MAP.get(k, k)] = v # Must filters. must = [] for field in self.VALID_FILTERS: value = data.get(field) if value is not None: if type(value) == list: filter_type = 'terms' else: filter_type = 'term' must.append(F(filter_type, **{field: value})) if must: return queryset.filter(Bool(must=must)) return queryset
def filter_queryset(self, request, qs, view): search_query = request.GET.get('q', '').lower() if not search_query: return qs lang = translation.get_language() analyzer = get_locale_analyzer(lang) # Our query consist of a number of should clauses. We call the ones # with the higher boost "primary" for convenience. primary_should = self.primary_should_rules(search_query, analyzer) secondary_should = self.secondary_should_rules(search_query, analyzer) # We alter scoring depending on the "boost" field which is defined in # the mapping (used to boost public addons higher than the rest) and, # if the waffle switch is on, whether or an addon is a webextension. functions = [ query.SF('field_value_factor', field='boost'), ] if waffle.switch_is_active('boost-webextensions-in-search'): functions.append( query.SF({ 'weight': WEBEXTENSIONS_WEIGHT, 'filter': F('term', **{'current_version.files.is_webextension': True}) })) # Assemble everything together and return the search "queryset". return qs.query('function_score', query=query.Bool(should=primary_should + secondary_should), functions=functions)
def filter_queryset(self, request, queryset, view): device_id = get_device_id(request) if device_id: queryset = queryset.filter( Bool(must=[F('term', device=device_id)])) return queryset
def filter_queryset(self, request, queryset, view): region = get_region_from_request(request) if region: return queryset.filter( Bool(must_not=[F('term', region_exclusions=region.id)])) return queryset
def filter_queryset(self, request, queryset, view): active_filters = [] active_facets = [] for serialized_filter in view.serialized_filters: filter_tags = serialized_filter['tags'] if not filter_tags: # Incomplete filter has no tags, skip it continue if serialized_filter['slug'] in view.selected_filters: # User selected this filter - filter on the associated tags tag_filters = [] for filter_tag in filter_tags: tag_filters.append(F('term', tags=filter_tag)) filter_operator = Filter.OPERATORS[ serialized_filter['operator']] if len(tag_filters) > 1 and filter_operator == 'and': # Add an AND filter as a subclause active_filters.append(F('and', tag_filters)) else: # Extend list of tags for the OR clause active_filters.extend(tag_filters) # Aggregate counts for active filters for sidebar if len(filter_tags) > 1: facet_params = F('terms', tags=list(filter_tags)) else: facet_params = F('term', tags=filter_tags[0]) active_facets.append((serialized_filter['slug'], facet_params)) # Count documents across all tags for facet_slug, facet_params in active_facets: queryset.aggs.bucket(facet_slug, 'filter', **facet_params.to_dict()) # Filter by tag only after counting documents across all tags if active_filters: if len(active_filters) == 1: queryset = queryset.post_filter(active_filters[0]) else: queryset = queryset.post_filter(F('or', active_filters)) return queryset
def fetch(session): s = Search(client) s = s.filter('bool', should=[ F('term', message='p2p.disconnected'), F('term', message='p2p.connected') ]) s = s.filter( 'range', **{'@timestamp': dict(gte=session['start'], lte=session['stop'])}) s = s.fields([ 'json_message.p2p.connected.remote_id', 'guid', 'message', '@timestamp' ]) s = s[0:100000] # s = s[0:10] s = s.sort('@timestamp') response = s.execute() return response
def filter_queryset(self, request, queryset, view): device_id = get_device_id(request) data = { 'gaia': getattr(request, 'GAIA', False), 'mobile': getattr(request, 'MOBILE', False), 'tablet': getattr(request, 'TABLET', False), } flash_incompatible = data['mobile'] or data['gaia'] if device_id: queryset = queryset.filter( Bool(must=[F('term', device=device_id)])) if flash_incompatible: queryset = queryset.filter( Bool(must_not=[F('term', uses_flash=True)])) return queryset
def test_agg_filter_for_date_histograms(): a = A('date_histogram', field='published_date', interval='month') f = agg_to_filter(a, datetime(2014, 12, 1)) assert f == F('range', published_date={ 'gte': datetime(2014, 12, 1), 'lt': datetime(2015, 1, 1) })
def test_complex_example(): s = search.Search() s = s.query('match', title='python') \ .query(~Q('match', title='ruby')) \ .filter(F('term', category='meetup') | F('term', category='conference')) \ .post_filter('terms', tags=['prague', 'czech']) s.aggs.bucket('per_country', 'terms', field='country')\ .metric('avg_attendees', 'avg', field='attendees') s.query.minimum_should_match = 2 assert { 'query': { 'filtered': { 'filter': { 'bool': { 'should': [ {'term': {'category': 'meetup'}}, {'term': {'category': 'conference'}} ] } }, 'query': { 'bool': { 'must': [ {'match': {'title': 'python'}}], 'must_not': [{'match': {'title': 'ruby'}}], 'minimum_should_match': 2 } } } }, 'post_filter': { 'terms': {'tags': ['prague', 'czech']} }, 'aggs': { 'per_country': { 'terms': {'field': 'country'}, 'aggs': { 'avg_attendees': {'avg': {'field': 'attendees'}} } } } } == s.to_dict()
def get_referencing_items(item_url): """ Get all items which have referenced the given item URL """ if item_url.startswith('/'): item_url = make_dummy_request().build_absolute_uri(item_url) query_filter = F('term', **{'serialized.references': item_url}) if 'topic' in item_url: query_filter = query_filter | (F( 'term', **{'serialized.related_topic.url': item_url})) query = index.make_search()\ .filter(query_filter)\ .fields(['url']) return [(result.url[0]) for result in query.execute().hits]
def filter_queryset(self, request, queryset, view): profile = get_feature_profile(request) if profile: must_not = [] for k in profile.to_kwargs(prefix='features.has_').keys(): must_not.append(F('term', **{k: True})) if must_not: return queryset.filter(Bool(must_not=must_not)) return queryset
def filter_by_apps(cls, app_ids, queryset=None): """ Filters the given queryset by the given app IDs. This uses a `should` filter, which is equivalent to an "OR". """ queryset = queryset or cls.search() app_ids = list(set(app_ids)) # De-dupe. queryset = queryset.filter(Bool(should=[F('terms', id=app_ids)])) return queryset[0:len(app_ids)]
def _add_unique_id_filters(form_model, unique_id_filters, search): if unique_id_filters: for uniqueIdType, uniqueIdFilter in unique_id_filters.iteritems(): if uniqueIdFilter: unique_id_filters = [] for question in [ question for question in form_model.entity_questions if question.unique_id_type == uniqueIdType ]: es_field_code = es_unique_id_code_field_name( es_questionnaire_field_name( question.code, form_model.id, parent_field_code=question.parent_field_code) ) + "_exact" unique_id_filters.append( F("term", **{es_field_code: uniqueIdFilter})) search = search.filter(F('or', unique_id_filters)) return search
def get_user(user_id): client = Elasticsearch(settings.ELASTICSEARCH_URL) s = Search(using=client, index=settings.HAYSTACK_CONNECTIONS['default'][ 'INDEX_NAME']) \ .filter(F("ids", type="modelresult", values=[ 'members.facebookcustomuseractive.%s' % user_id])) response = s.execute() return response.hits.hits
def test_filter_can_be_overriden(): s = search.Search().filter('term', tag='python') s.filter = ~F(s.filter) assert { "query": { "filtered": { "query": {"match_all": {}}, "filter": {"bool": {"must_not": [{"term": {"tag": "python"}}]}} } } } == s.to_dict()
def filter_queryset(self, request, queryset, view): active_filters = [] active_facets = [] for serialized_filter in view.serialized_filters: filter_tags = serialized_filter['tags'] filter_operator = Filter.OPERATORS[serialized_filter['operator']] if serialized_filter['slug'] in view.selected_filters: if len(filter_tags) > 1: tag_filters = [] for filter_tag in filter_tags: tag_filters.append(F('term', tags=filter_tag)) active_filters.append(F(filter_operator, tag_filters)) else: active_filters.append(F('term', tags=filter_tags[0])) if len(filter_tags) > 1: facet_params = F('terms', tags=list(filter_tags)) else: if filter_tags: facet_params = F('term', tags=filter_tags[0]) if len(filter_tags): active_facets.append((serialized_filter['slug'], facet_params)) if active_filters: if len(active_filters) == 1: queryset = queryset.post_filter(active_filters[0]) else: queryset = queryset.post_filter(F('or', active_filters)) for facet_slug, facet_params in active_facets: queryset.aggs.bucket(facet_slug, 'filter', **facet_params.to_dict()) return queryset
def assert_mining(minmining, offset=300): """ assert that at least `minmining` clients have started mining and mined a block """ s = Search(client) s = s.filter(F('term', message='eth.miner.new_block')) s = s.filter(time_range_filter(offset=offset)) s.aggs.bucket('by_host', 'terms', field='syslog_hostname.raw', size=0) response = s.execute() # pprint(response) print "passed for: " for tag in response.aggregations.by_host.buckets: print ' %s, blocks mined: %d' % (tag.key, tag.doc_count ) # ip_from_guid(tag.key) num_mining = len(response.aggregations.by_host.buckets) assert num_mining >= minmining, 'only %d clients mining, expected at least %d' % ( num_mining, minmining)
def filter_queryset(self, request, queryset, view): active_filters = [] active_facets = [] for serialized_filter in view.serialized_filters: filter_tags = serialized_filter['tags'] filter_operator = Filter.OPERATORS[serialized_filter['operator']] if serialized_filter['slug'] in view.selected_filters: if len(filter_tags) > 1: tag_filters = [] for filter_tag in filter_tags: tag_filters.append(F('term', tags=filter_tag)) active_filters.append(F(filter_operator, tag_filters)) else: active_filters.append(F('term', tags=filter_tags[0])) if len(filter_tags) > 1: facet_params = F('terms', tags=filter_tags) else: facet_params = F('term', tags=filter_tags[0]) active_facets.append((serialized_filter['slug'], facet_params)) unfiltered_queryset = queryset if active_filters: if len(active_filters) == 1: queryset = queryset.post_filter(active_filters[0]) else: queryset = queryset.post_filter(F('or', active_filters)) # only way to get to the currently applied filters # to use it to limit the facets filters below facet_filter = unfiltered_queryset.to_dict().get('filter', []) # TODO: Convert to use aggregations. facets = {} for facet_slug, facet_params in active_facets: facets[facet_slug] = { 'filter': facet_params.to_dict(), 'facet_filter': facet_filter, } queryset = queryset.extra(facets=facets) return queryset
def get_queryset(self): qs = BaseMultiSearchView.get_queryset(self) return qs.filter(Bool(must=[F('term', device=mkt.DEVICE_TV.id)]))
def get(self, **kwargs): """Return a list of results and aggregations based on parameters. The list of accepted parameters (with types and default values) is in the database and can be accessed with the super_search_fields service. """ # Require that the list of fields be passed. if not kwargs.get('_fields'): raise MissingArgumentError('_fields') self.all_fields = kwargs['_fields'] self._build_fields() # Filter parameters and raise potential errors. params = self.get_parameters(**kwargs) # Find the indices to use to optimize the elasticsearch query. indices = self.get_indices(params['date']) # Create and configure the search object. search = Search( using=self.get_connection(), index=indices, doc_type=self.config.elasticsearch.elasticsearch_doctype, ) # Create filters. filters = [] histogram_intervals = {} for field, sub_params in params.items(): sub_filters = None for param in sub_params: if param.name.startswith('_'): # By default, all param values are turned into lists, # even when they have and can have only one value. # For those we know there can only be one value, # so we just extract it from the made-up list. if param.name == '_results_offset': results_from = param.value[0] elif param.name == '_results_number': results_number = param.value[0] if results_number > 1000: raise BadArgumentError('_results_number too large') elif param.name == '_facets_size': facets_size = param.value[0] for f in self.histogram_fields: if param.name == '_histogram_interval.%s' % f: histogram_intervals[f] = param.value[0] # Don't use meta parameters in the query. continue field_data = self.all_fields[param.name] name = '%s.%s' % (field_data['namespace'], field_data['in_database_name']) if param.data_type in ('date', 'datetime'): param.value = datetimeutil.date_to_string(param.value) elif param.data_type == 'enum': param.value = [x.lower() for x in param.value] elif param.data_type == 'str' and not param.operator: param.value = [x.lower() for x in param.value] # Operators needing wildcards, and the associated value # transformation with said wildcards. operator_wildcards = { '~': '*%s*', # contains '$': '%s*', # starts with '^': '*%s' # ends with } # Operators needing ranges, and the associated Elasticsearch # comparison operator. operator_range = { '>': 'gt', '<': 'lt', '>=': 'gte', '<=': 'lte', } args = {} filter_type = 'term' filter_value = None if not param.operator: # contains one of the terms if len(param.value) == 1: val = param.value[0] if not isinstance(val, basestring) or ' ' not in val: # There's only one term and no white space, this # is a simple term filter. filter_value = val else: # If the term contains white spaces, we want to # perform a phrase query. filter_type = 'query' args = Q( 'simple_query_string', query=param.value[0], fields=[name], default_operator='and', ).to_dict() else: # There are several terms, this is a terms filter. filter_type = 'terms' filter_value = param.value elif param.operator == '=': # is exactly if field_data['has_full_version']: name = '%s.full' % name filter_value = param.value elif param.operator in operator_range: filter_type = 'range' filter_value = { operator_range[param.operator]: param.value } elif param.operator == '__null__': filter_type = 'missing' args['field'] = name elif param.operator in operator_wildcards: filter_type = 'query' # Wildcard operations are better applied to a non-analyzed # field (called "full") if there is one. if field_data['has_full_version']: name = '%s.full' % name q_args = {} q_args[name] = (operator_wildcards[param.operator] % param.value) query = Q('wildcard', **q_args) args = query.to_dict() if filter_value is not None: args[name] = filter_value if args: new_filter = F(filter_type, **args) if param.operator_not: new_filter = ~new_filter if sub_filters is None: sub_filters = new_filter elif filter_type == 'range': sub_filters &= new_filter else: sub_filters |= new_filter continue if sub_filters is not None: filters.append(sub_filters) search = search.filter(F('bool', must=filters)) # Restricting returned fields. fields = [] for param in params['_columns']: for value in param.value: if not value: continue field_name = self.get_field_name(value, full=False) fields.append(field_name) search = search.fields(fields) # Sorting. sort_fields = [] for param in params['_sort']: for value in param.value: if not value: continue # Values starting with a '-' are sorted in descending order. # In order to retrieve the database name of the field, we # must first remove the '-' part and add it back later. # Example: given ['product', '-version'], the results will be # sorted by ascending product and descending version. desc = False if value.startswith('-'): desc = True value = value[1:] field_name = self.get_field_name(value, full=False) if desc: # The underlying library understands that '-' means # sorting in descending order. field_name = '-' + field_name sort_fields.append(field_name) search = search.sort(*sort_fields) # Pagination. results_to = results_from + results_number search = search[results_from:results_to] # Create facets. for param in params['_facets']: self._add_second_level_aggs( param, search.aggs, facets_size, histogram_intervals, ) # Create sub-aggregations. for key in params: if not key.startswith('_aggs.'): continue fields = key.split('.')[1:] if fields[0] not in self.all_fields: continue base_bucket = self._get_fields_agg(fields[0], facets_size) sub_bucket = base_bucket for field in fields[1:]: # For each field, make a bucket, then include that bucket in # the latest one, and then make that new bucket the latest. if field in self.all_fields: tmp_bucket = self._get_fields_agg(field, facets_size) sub_bucket.bucket(field, tmp_bucket) sub_bucket = tmp_bucket for value in params[key]: self._add_second_level_aggs( value, sub_bucket, facets_size, histogram_intervals, ) search.aggs.bucket(fields[0], base_bucket) # Create histograms. for f in self.histogram_fields: key = '_histogram.%s' % f if params.get(key): histogram_bucket = self._get_histogram_agg( f, histogram_intervals) for param in params[key]: self._add_second_level_aggs( param, histogram_bucket, facets_size, histogram_intervals, ) search.aggs.bucket('histogram_%s' % f, histogram_bucket) # Query and compute results. hits = [] if params['_return_query'][0].value[0]: # Return only the JSON query that would be sent to elasticsearch. return { 'query': search.to_dict(), 'indices': indices, } # We call elasticsearch with a computed list of indices, based on # the date range. However, if that list contains indices that do not # exist in elasticsearch, an error will be raised. We thus want to # remove all failing indices until we either have a valid list, or # an empty list in which case we return no result. while True: try: results = search.execute() for hit in results: hits.append(self.format_fields(hit.to_dict())) total = search.count() aggregations = self.format_aggregations(results.aggregations) break # Yay! Results! except NotFoundError, e: missing_index = re.findall(BAD_INDEX_REGEX, e.error)[0] if missing_index in indices: del indices[indices.index(missing_index)] else: # Wait what? An error caused by an index that was not # in the request? That should never happen, but in case # it does, better know it. raise if indices: # Update the list of indices and try again. # Note: we need to first empty the list of indices before # updating it, otherwise the removed indices never get # actually removed. search = search.index().index(*indices) else: # There is no index left in the list, return an empty # result. hits = [] total = 0 aggregations = {} break