def organisation_search_factory(self, search, query_parser=None): """Search factory with view code parameter. Exlcude masked record from public search. """ # TODO: this is a temporary implemenation of the masked holdings records. # this functionality will be completed after merging the USs: # US1909: Performance: many items on public document detailed view # US1906: Complete item model if current_librarian: search = search.filter( 'term', organisation__pid=current_librarian.organisation_pid) view = request.args.get( 'view', current_app.config.get('RERO_ILS_SEARCH_GLOBAL_VIEW_CODE')) search, urlkwargs = search_factory(self, search) if view != current_app.config.get('RERO_ILS_SEARCH_GLOBAL_VIEW_CODE'): search = search.filter('bool', must_not=[Q('term', _masked=True)]) return search, urlkwargs
def create_search( must: list = None, should: list = None, filter_: list = None, must_not: list = None, source: dict = None, sort=None, ) -> Search: """ Search index by construct query. Kwargs: must: list of the must satisfied query should: list of the should satisfied query sort: sort statement Return: Search object. """ s = Search(index=INDEX) match_all = Q("match_all") must = must + [match_all] if must else [match_all] should = should if should else [] filter_ = filter_ if filter_ else [] must_not = must_not if must_not else [] s = s.query("bool", must=must, should=should, filter=filter_, must_not=must_not) if sort: s = s.sort(sort) if source: s = s.source(**source) print(f"Query: {json.dumps(s.to_dict())}") return s
def similar_declarations(self, limit=12, return_full_body=False): fields = [ "general.last_name", "general.name", "general.patronymic", "general.full_name", ] s = (Search(index=CATALOG_INDICES).query( "multi_match", query=self.general.full_name, operator="and", fields=fields, ).query(~Q("term", _id=self.meta.id))) if return_full_body: s = s.doc_type(NACPDeclaration, Declaration) return s[:limit].execute()
def and_search_factory(self, search, query_parser=None): """Parse query using elasticsearch DSL query. :param self: REST view. :param search: Elastic search DSL search instance. :returns: Tuple with search instance and URL arguments. """ def _default_parser(qstr=None): """Default parser that uses the Q() from elasticsearch_dsl.""" if qstr: return Q('query_string', query=qstr, default_operator='AND') return Q() from invenio_records_rest.facets import default_facets_factory from invenio_records_rest.sorter import default_sorter_factory query_string = request.values.get('q') query_parser = query_parser or _default_parser try: search = search.query(query_parser(query_string)) except SyntaxError: current_app.logger.debug( f'Failed parsing query: {request.values.get("q", "")}', exc_info=True, ) raise InvalidQueryRESTError() search_index = search._index[0] search, urlkwargs = default_facets_factory(search, search_index) search, sortkwargs = default_sorter_factory(search, search_index) for key, value in sortkwargs.items(): urlkwargs.add(key, value) urlkwargs.add('q', query_string) # include deleted deleted = request.args.get('deleted') if not deleted: search = search.filter('bool', must_not=[Q('exists', field='deleted')]) return search, urlkwargs
def index_or_update_project(self, uuid): """ Takes a project UUID and either creates a new document in the des-projects index or updates the document if one already exists for that project. """ from designsafe.apps.api.projects.models import Project client = get_service_account_client() project_model = Project(client) project = project_model.search({'uuid': uuid}, client)[0] project_meta = project.to_dict() to_index = {key: value for key, value in project_meta.iteritems() if key != '_links'} to_index['value'] = {key: value for key, value in project_meta['value'].iteritems() if key != 'teamMember'} if not isinstance(to_index['value'].get('awardNumber', []), list): to_index['value']['awardNumber'] = [{'number': to_index['value']['awardNumber'] }] if to_index['value'].get('guestMembers', []) == [None]: to_index['value']['guestMembers'] = [] project_search = IndexedProject.search().filter( Q({'term': {'uuid._exact': uuid} }) ) res = project_search.execute() if res.hits.total == 0: # Create an ES record for the new metadata. # project_info_args = {key:value for key,value in project_info.iteritems() if key != '_links'} project_ES = IndexedProject(**to_index) project_ES.save() elif res.hits.total == 1: # Update the record. doc = res[0] doc.update(**to_index) else: # If we're here we've somehow indexed the same project multiple times. # Delete all records and replace with the metadata passed to the task. for doc in res: doc.delete() project_ES = IndexedProject(**to_index) project_ES.save()
def date(data): try: query_date = parse( data['field_value'].strip()).strftime('%Y-%m-%d') query_object = {data['range_type']: query_date} query = 'term' if data['range_type'] == 'value' else 'range' query = Q(query, **{data['field']: query_object}) highlight = { data['field']: data['highlight'] } if data.get('highlight', False) else False return query, highlight, False except ValueError: errors = [ ('Value "{}" could not be parsed into a date for field "{}". \ Try reformatting it, for example as "DD-MM-YYYY"'.format( data['field_value'], data['field'])) ] return False, False, errors
def list_all_courses(term): if term == 'current': index = utils.get_current_course_index() else: index = ES_COURSE_INDEX_PREFIX + term print(index) query = Q() # Use ES api to search s = Search(index=index).query(query).extra(size=7000).source(False) try: response = s.execute().to_dict() if "hits" in response: for elem in response['hits']['hits']: print(elem['_id']) courseids = [elem['_id'] for elem in response['hits']['hits']] return courseids except: pass return []
def create_geo_point(self, options): ''' Given the options for an geo_point term, creates and returns an elasticsearch-dsl object to represent it. This term maps directly to an elasticsearch geo_distance query. If only one field is present in the fields property then the term query is returned directly, otherwise an or query is returned across all the fields requested. :param options: the options for the geo_point query :return: an elasticsearch-dsl Query object or a Bool object ''' return Q( u'geo_distance', **{ u'distance': u'{}{}'.format(options.get(u'radius', 0), options.get(u'radius_unit', u'm')), u'meta.geo': { u'lat': options[u'latitude'], u'lon': options[u'longitude'], } })
def rdm_records_filter(): """Records filter.""" # TODO: Implement with new permissions metadata try: perm_factory = current_app.config["RECORDS_REST_ENDPOINTS"]["recid"][ "read_permission_factory_imp"]() # noqa except KeyError: perm_factory = record_read_permission_factory # FIXME: this might fail if factory returns None, meaning no "query_filter" # was implemente in the generators. However, IfPublic should always be # there. filters = perm_factory.query_filters if filters: qf = None for f in filters: qf = qf | f if qf else f return qf else: return Q()
def get_document_text_slice(self, slice_count=0, slice_size=1000, slice_id=0): s = Search(using=self.es, index=self.index, doc_type='items').query(Q({"match_all": {}})).params(scroll='5m', size=slice_size) # s = s.extra(slice={"id": work, "max": 1}) s = s.extra(slice={'id': slice_id, 'max': slice_count}) response = s.execute() # print("MIN ID:", min(map(int, ([h['_id'] for h in response.hits.hits])))) # print("MAX ID:", max(map(int, ([h['_id'] for h in response.hits.hits])))) # print("ID COUNT:", len([h['_id'] for h in response.hits.hits])) for document in response: if 'itemText' in document: yield document.meta.id, document['itemText'] else: yield document.meta.id, ''
def query_filter(self, **kwargs): """Filters for current identity as owner.""" # Contains logged-in user information provides = g.identity.provides # Specify which restriction will be applied (owners) matches = {"applied_restrictions": "owners"} # Gets the user id for need in provides: if need.method == "id": matches["_owners"] = need.value break # Queries Elasticsearch -> both user_id and applied_restrictions need to match queries = [ Q("match", **{match: f"{matches[match]}"}) for match in matches ] return reduce(operator.and_, queries)
def bollinger_band(index='cf_etf_hist_price', start_date='2018-12-26', end_date='2019-03-25', symbol='rfem'): ESLowLevelClientByConnection.get_instance() search = Search(index=index, using='high_level_client')[0:0] search.query = Q( Bool(must=[ Range(date={ 'gte': '2018-12-26', 'lte': '2019-03-25' }), Term(symbol='rfem') ])) aggs = A( DateHistogram(field='date', interval='1d', format='yyyy-MM-dd', min_doc_count=1)) aggs_tp = A( ScriptedMetric( init_script='state.totals=[]', map_script= 'state.totals.add((doc.high.value+doc.low.value+doc.close.value)/3)', combine_script= 'double total=0; for (t in state.totals) {total += t} return total', reduce_script= 'double total=0; for (t in states) {total += t} return total')) aggs_moving_avg = A( MovingAvg(model='simple', window=20, buckets_path='tp.value')) aggs_bbu = A( BucketScript(buckets_path={'SMA': '20_trading_days_moving_avg'}, script='params.SMA + 0.5')) aggs_bbl = A( BucketScript(buckets_path={'SMA': '20_trading_days_moving_avg'}, script='params.SMA - 0.5')) search.aggs.bucket('Bollinger_band', aggs).metric('tp', aggs_tp).pipeline( '20_trading_days_moving_avg', aggs_moving_avg).pipeline('BBU', aggs_bbu).pipeline('BBL', aggs_bbl) response = search.execute() print(response.to_dict())
def aggregate(self, queryset, agg_field_name_getter=default_agg_field_name_getter, agg_bucket_name_getter=default_agg_bucket_name_getter): """Aggregate. :param queryset: :param agg_field_name_getter: callable. :param agg_bucket_name_getter: :return: """ _facets = self.construct_facets() for _field, _facet in _facets.items(): agg = _facet['facet'].get_aggregation() agg_filter = Q('match_all') # TODO: Implement # for __filter_field, __filter in self._filters.items(): # if __field == __filter_field: # continue # agg_filter &= __filter if _facet['global']: queryset.aggs.bucket( agg_field_name_getter(_field), 'global' ).bucket( agg_bucket_name_getter(_field), agg ) else: queryset.aggs.bucket( agg_field_name_getter(_field), 'filter', filter=agg_filter ).bucket( agg_bucket_name_getter(_field), agg ) return queryset
def apply_query(cls, queryset, options=None, args=None, kwargs=None): """Apply query. :param queryset: :param options: :param args: :param kwargs: :return: """ if options is None: raise ImproperlyConfigured( "You should provide an `path` argument in the field options.") path = options.pop('path') if args is None: args = [] if kwargs is None: kwargs = {} return queryset.query('nested', path=path, query=Q(*args, **kwargs))
def apply_query_lte(cls, queryset, options, value): """Apply `lte` functional query. :param queryset: Original queryset. :param options: Filter options. :param value: value to filter on. :type queryset: elasticsearch_dsl.search.Search :type options: dict :type value: str :return: Modified queryset. :rtype: elasticsearch_dsl.search.Search """ path = options.pop('path') return queryset.query( 'nested', path=path, query=Q( 'range', **{options['field']: cls.get_gte_lte_params(value, 'lte')} ) )
def from_id(cls, project_id): if project_id is None: raise DocumentNotFound() search = cls.search().query( Q({"term": { "projectId._exact": project_id }})) try: res = search.execute() except Exception as e: raise e if res.hits.total > 1: for doc in res[1:res.hits.total]: doc.delete() return res[0] elif res.hits.total == 1: return res[0] else: raise DocumentNotFound("No document found for " "{}".format(project_id))
def ill_request_search_factory(self, search, query_parser=None): """Ill request search factory. Restricts results to organisation level for librarian and sys_lib. Restricts results to its loans for users with role patron. Exclude to_anonymize loans from results. """ search, urlkwargs = search_factory(self, search) if current_librarian: search = search.filter( 'term', organisation__pid=current_librarian.organisation_pid ) elif current_patrons: search = search.filter( 'terms', patron__pid=[ptrn.pid for ptrn in current_patrons]) # exclude to_anonymize records search = search.filter('bool', must_not=[Q('term', to_anonymize=True)]) return search, urlkwargs
def filter_search(self, search): """Filter given search by the filter parameter given in request. :param search: ElasticSearch query object """ for field in self.filtering_fields: value = self.get_query_param(field, None) if value: custom_filter = getattr(self, 'custom_filter_{}'.format(field), None) if custom_filter is not None: search = custom_filter(value, search) elif isinstance(value, list): # Default is 'should' between matches. If you need anything else, # a custom filter for this field should be implemented. filters = [Q('match', **{field: item}) for item in value] search = search.query('bool', should=filters) else: search = search.query('match', **{field: value}) return search
def apply_query_exclude(cls, queryset, options, value): """Apply `exclude` functional query. Syntax: /endpoint/?field_name__isnull={value1}__{value2} /endpoint/?field_name__exclude={valu1} Note, that number of values is not limited. Example: http://localhost:8000/api/articles/?tags__exclude=children__python http://localhost:8000/api/articles/?tags__exclude=children :param queryset: Original queryset. :param options: Filter options. :param value: value to filter on. :type queryset: elasticsearch_dsl.search.Search :type options: dict :type value: str :return: Modified queryset. :rtype: elasticsearch_dsl.search.Search """ __values = cls.split_lookup_complex_value(value) __queries = [] for __value in __values: __queries.append( ~Q('term', **{options['field']: __value}) ) if __queries: queryset = cls.apply_query( queryset=queryset, options=options, args=[six.moves.reduce(operator.or_, __queries)] ) return queryset
def more_like_this(elastic_url, fields: list, like: list, size: int, filters: list, aggregations: list, include: bool, if_agg_only: bool, dataset: Dataset, return_fields=None): # Create the base query creator and unite with ES gateway. search = Search(using=Elasticsearch(elastic_url)).index(dataset.index).doc_type(dataset.mapping) mlt = MoreLikeThis(like=like, fields=fields, min_term_freq=1, max_query_terms=12, include=include) # Prepare the MLT part of the query. paginated_search = search[0:size] # Set how many documents to return. limited_search = paginated_search.source(return_fields) if return_fields else paginated_search # If added, choose which FIELDS to return. finished_search = limited_search.query(mlt) # Add the premade MLT into the query. # Apply all the user-set filters, if they didn't add any this value will be [] and it quits. for filter_dict in filters: finished_search = finished_search.filter(Q(filter_dict)) # Apply all the user-set aggregations, if they didn't add any this value will be [] and it quits. for aggregation_dict in aggregations: # aggs.bucket() does not return a Search object but changes it instead. if aggregation_dict["agg_type"] == "composite": after = aggregation_dict.get("after_key", None) finished_search = ES_Manager.handle_composition_aggregation(finished_search.to_dict(), aggregation_dict, after) else: field_name = aggregation_dict["field"] index = like[0]["_index"] field = "{}.keyword".format(field_name) if ES_Manager.is_field_text_field(field_name=field_name, index_name=index) else field_name finished_search.aggs.bucket(name=aggregation_dict["bucket_name"], agg_type=aggregation_dict["agg_type"], field=field) # Choose if you want to return only the aggregations in {"bucket_name": {results...}} format. if if_agg_only: finished_search = finished_search.params(size=0) response = finished_search.execute() return response.aggs.to_dict() try: response = finished_search.execute() result = {"hits": [hit.to_dict() for hit in response]} # Throw out all metadata and keep only the documents. if response.aggs: result.update({"aggregations": response.aggs.to_dict()}) # IF the aggregation query returned anything, THEN add the "aggregatons" key with results. return result except ElasticsearchException as e: logging.getLogger(ERROR_LOGGER).exception(e) return {"elasticsearch": [str(e)]}
def apply_query_in(cls, queryset, options, value): """Apply `in` functional query. :param queryset: Original queryset. :param options: Filter options. :param value: value to filter on. :type queryset: elasticsearch_dsl.search.Search :type options: dict :type value: str :return: Modified queryset. :rtype: elasticsearch_dsl.search.Search """ __values = cls.split_lookup_value(value) __queries = [] for __value in __values: __queries.append(Q('term', **{options['field']: __value})) if __queries: queryset = queryset.query(six.moves.reduce(operator.or_, __queries)) return queryset
def get_searchqueryset(self): query = self.request.GET.get('q', '') sqs = SearchQuerySetWrapper(PublicBodyDocument.search(), PublicBody) if len(query) > 2: sqs = sqs.set_query( Q("multi_match", query=query, fields=['name_auto', 'content'])) model_filters = { 'jurisdiction': Jurisdiction, 'classification': Classification, 'categories': Category, 'regions': GeoRegion, } for key, model in model_filters.items(): pks = self.request.GET.getlist(key) if pks: try: obj = model.objects.filter(pk__in=pks) sqs = sqs.filter(**{key: [o.pk for o in obj]}) except ValueError: # Make result set empty, no 0 pk present sqs = sqs.filter(**{key: 0}) other_filters = {'regions_kind': 'regions_kind'} for key, search_key in other_filters.items(): values = self.request.GET.getlist(key) if values: sqs = sqs.filter(**{search_key: values}) sqs = sqs.add_aggregation([ 'jurisdiction', 'classification', 'categories', 'regions', ]) return sqs
def getScansFromES(scantype=None, domain=None, request=None): es = Elasticsearch([os.environ['ESURI']]) dates = getdates() latestindex = dates[1] + '-*' indices = list(es.indices.get_alias(latestindex).keys()) y, m, d, scantypes = zip(*(s.split("-") for s in indices)) if scantype != None: if scantype not in scantypes: # If we requested a scantype that does not exist, then return an empty query s = Search(using=es, index=latestindex) s = s.query(~Q('match_all')) else: index = dates[1] + '-' + scantype s = Search(using=es, index=index) else: # Fall through to a domain query across all indices s = Search(using=es, index=latestindex) s = s.sort('domain') # filter by domain if we have one if domain != None: s = s.filter("term", domain=domain) # Make the api url pretty if request != None: apiurl = request.scheme + '://' + request.get_host() + '/api/v1/scans/' # generate the list of scans, make the API url pretty. scans = [] for i in s.scan(): if request != None: i['scan_data_url'] = apiurl + i['scantype'] + '/' + i[ 'domain'] + '/' scans.append(i.to_dict()) return scans
def aggregate(self, request, queryset, view): """Aggregate. :param request: :param queryset: :param view: :return: """ __facets = self.construct_facets(request, view) for __field, __facet in iteritems(__facets): agg = __facet.get_aggregation() agg_filter = Q('match_all') # TODO: Implement # for __filter_field, __filter in iteritems(self._filters): # if __field == __filter_field: # continue # agg_filter &= __filter queryset.aggs.bucket('_filter_' + __field, 'filter', filter=agg_filter).bucket(__field, agg) return queryset
def inner(values): range_query = { "Overdue": { "lt": str(arrow.utcnow().date()) }, "Upcoming return": { "lte": str(current_app.config["CIRCULATION_POLICIES"] ["upcoming_return_range"]().date()), "gte": str(arrow.utcnow().date()) } } args = {} for range_key, mappings in range_query.items(): if range_key in values: for key, value in mappings.items(): args[key] = value return Range(**{field: args}) & Q( 'terms', ** {'state': current_app.config["CIRCULATION_STATES_LOAN_ACTIVE"]})
def get_search_results(term, page, size): """Search companies by term Wildcard search of companies by provided term. The position of companies that have only one sector is increased. Arguments: term {str} -- Search term to match on page {int} -- Page number to query size {int} -- Number of results per page Returns: dict -- Companies that match the term """ start = (page - 1) * size end = start + size query = search.CompanyDocType.search().query( 'function_score', query=Q('match', _all=term), functions=[SF('field_value_factor', field='has_single_sector')]) return query[start:end].execute().to_dict()
def search_versions(self, id_, identity, params=None, es_preference=None, **kwargs): """Search for record's versions.""" record = self.record_cls.pid.resolve(id_, registered_only=False) self.require_permission(identity, "read", record=record) # Prepare and execute the search params = params or {} search_result = self._search( 'search_versions', identity, params, es_preference, record_cls=self.record_cls, search_opts=self.config.search_versions, extra_filter=Q('term', **{'parent.id': str(record.parent.pid.pid_value)}), permission_action='read', **kwargs).execute() return self.result_list( self, identity, search_result, params, links_tpl=LinksTemplate(self.config.links_search_versions, context={ "id": id_, "args": params }), links_item_tpl=self.links_item_tpl, )
def apply_query_contains(cls, queryset, options, value): """Apply `contains` filter. Syntax: TODO Example: query { allPostDocuments(filter:{category:{contains:"tho"}}) { edges { node { category title content numViews } } } } :param queryset: Original queryset. :param options: Filter options. :param value: value to filter on. :type queryset: elasticsearch_dsl.search.Search :type options: dict :type value: str :return: Modified queryset. :rtype: elasticsearch_dsl.search.Search """ return cls.apply_query( queryset=queryset, options=options, args=[Q("wildcard", **{options["field"]: "*{}*".format(value)})], )
def apply_query_endswith(cls, queryset, options, value): """Apply `endswith` filter. Syntax: /endpoint/?field_name__endswith={value} Example: http://localhost:8000/api/articles/?tags__endswith=dren :param queryset: Original queryset. :param options: Filter options. :param value: value to filter on. :type queryset: elasticsearch_dsl.search.Search :type options: dict :type value: str :return: Modified queryset. :rtype: elasticsearch_dsl.search.Search """ return cls.apply_query( queryset=queryset, options=options, args=[Q('wildcard', **{options['field']: '*{}'.format(value)})])
def search_file_by_path(client, index, path): """ Search for a file using its path :param path: :return: """ s = Search(using=client, index=index) q = Q('match', path=path) # exact search in the path field print("----------------------------------------") print(q) s = s.query(q) print("----------------------------------------") print(s) result = s.execute() print("----------------------------------------") print(result) lfiles = [r for r in result] if len(lfiles) == 0: raise NameError(f'File [{path}] not found') else: return lfiles[0].meta.id