def append_instance_permission_filter_dsl(request, search_results_object): if request.user.is_superuser is False: has_access = Bool() terms = Terms(field="permissions.users_with_no_access", terms=[str(request.user.id)]) has_access.must_not(terms) search_results_object["query"].add_query(has_access)
def append_dsl(self, search_results_object, permitted_nodegroups, include_provisional): nested_agg = NestedAgg(path="points", name="geo_aggs") nested_agg_filter = FiltersAgg(name="inner") geo_agg_filter = Bool() if include_provisional is True: geo_agg_filter.filter( Terms(field="points.provisional", terms=["false", "true"])) else: if include_provisional is False: geo_agg_filter.filter( Terms(field="points.provisional", terms=["false"])) elif include_provisional is "only provisional": geo_agg_filter.filter( Terms(field="points.provisional", terms=["true"])) geo_agg_filter.filter( Terms(field="points.nodegroup_id", terms=permitted_nodegroups)) nested_agg_filter.add_filter(geo_agg_filter) nested_agg_filter.add_aggregation( GeoHashGridAgg(field="points.point", name="grid", precision=settings.HEX_BIN_PRECISION)) nested_agg_filter.add_aggregation( GeoBoundsAgg(field="points.point", name="bounds")) nested_agg.add_aggregation(nested_agg_filter) search_results_object["query"].add_aggregation(nested_agg)
def get_restricted_instances(user, search_engine=None, allresources=False): if allresources is False and user.is_superuser is True: return [] if allresources is True: restricted_group_instances = { perm["object_pk"] for perm in GroupObjectPermission.objects.filter(permission__codename="no_access_to_resourceinstance").values("object_pk") } restricted_user_instances = { perm["object_pk"] for perm in UserObjectPermission.objects.filter(permission__codename="no_access_to_resourceinstance").values("object_pk") } all_restricted_instances = list(restricted_group_instances | restricted_user_instances) return all_restricted_instances else: terms = Terms(field="permissions.users_with_no_access", terms=[str(user.id)]) query = Query(search_engine, start=0, limit=settings.SEARCH_RESULT_LIMIT) has_access = Bool() nested_term_filter = Nested(path="permissions", query=terms) has_access.must(nested_term_filter) query.add_query(has_access) results = query.search(index=RESOURCES_INDEX, scroll="1m") scroll_id = results["_scroll_id"] total = results["hits"]["total"]["value"] if total > settings.SEARCH_RESULT_LIMIT: pages = total // settings.SEARCH_RESULT_LIMIT for page in range(pages): results_scrolled = query.se.es.scroll(scroll_id=scroll_id, scroll="1m") results["hits"]["hits"] += results_scrolled["hits"]["hits"] restricted_ids = [res["_id"] for res in results["hits"]["hits"]] return restricted_ids
def delete(self, user={}, note=''): """ Deletes a single resource and any related indexed data """ se = SearchEngineFactory().create() related_resources = self.get_related_resources(lang="en-US", start=0, limit=1000) for rr in related_resources['resource_relationships']: models.ResourceXResource.objects.get(pk=rr['resourcexid']).delete() query = Query(se) bool_query = Bool() bool_query.filter( Terms(field='resourceinstanceid', terms=[self.resourceinstanceid])) query.add_query(bool_query) results = query.search(index='strings', doc_type='term')['hits']['hits'] for result in results: se.delete(index='strings', doc_type='term', id=result['_id']) se.delete(index='resource', doc_type=str(self.graph_id), id=self.resourceinstanceid) self.save_edit(edit_type='delete') super(Resource, self).delete()
def find_overlapping(request): '''This function queries ES when called via Ajax when a new geometry is created in the Location tab. If pre-existing resources are found within the perimeter of the polygon (or the buffered zone around a point/line/polygon), an alert is raised.''' geomString = request.GET.get('geom', '') geom = GEOSGeometry(geomString, srid=4326) mindistance = settings.METER_RADIUS if not mindistance: mindistance = 1000 # if settings.METER_RADIUS isn't set, default to 1Km geom.transform(3857) buffered_geom = geom.buffer(mindistance) buffered_geom.transform(4326) print geom, buffered_geom se = SearchEngineFactory().create() query = Query(se) boolfilter = Bool() geoshape = GeoShape(field='geometries.value', type=buffered_geom.geom_type, coordinates=buffered_geom.coords) nested = Nested(path='geometries', query=geoshape) boolfilter.must(nested) query.add_filter(boolfilter) results = query.search(index='entity', doc_type='') overlaps = [] for hit in results['hits']['hits']: overlaps.append({ 'id': hit['_id'], 'type': hit['_type'], 'primaryname': hit['_source']['primaryname'] }) return JSONResponse(overlaps)
def delete(self, *args, **kwargs): se = SearchEngineFactory().create() request = kwargs.pop('request', None) provisional_edit_log_details = kwargs.pop('provisional_edit_log_details', None) for tile in self.tiles: tile.delete(*args, request=request, **kwargs) try: user = request.user user_is_reviewer = request.user.groups.filter(name='Resource Reviewer').exists() except AttributeError: #no user user = None if user_is_reviewer is True or self.user_owns_provisional(user): query = Query(se) bool_query = Bool() bool_query.filter(Terms(field='tileid', terms=[self.tileid])) query.add_query(bool_query) results = query.search(index='terms')['hits']['hits'] for result in results: se.delete(index='terms', id=result['_id']) self.__preDelete(request) self.save_edit( user=request.user, edit_type='tile delete', old_value=self.data, provisional_edit_log_details=provisional_edit_log_details) super(Tile, self).delete(*args, **kwargs) resource = Resource.objects.get(resourceinstanceid=self.resourceinstance.resourceinstanceid) resource.index() else: self.apply_provisional_edit(user, data={}, action='delete') super(Tile, self).save(*args, **kwargs)
def search_terms(request): lang = request.GET.get('lang', settings.LANGUAGE_CODE) se = SearchEngineFactory().create() searchString = request.GET.get('q', '') user_is_reviewer = request.user.groups.filter(name='Resource Reviewer').exists() i = 0 ret = {} for index in ['terms', 'concepts']: query = Query(se, start=0, limit=0) boolquery = Bool() boolquery.should(Match(field='value', query=searchString.lower(), type='phrase_prefix')) boolquery.should(Match(field='value.folded', query=searchString.lower(), type='phrase_prefix')) boolquery.should(Match(field='value.folded', query=searchString.lower(), fuzziness='AUTO', prefix_length=settings.SEARCH_TERM_SENSITIVITY)) if user_is_reviewer is False and index == 'terms': boolquery.filter(Terms(field='provisional', terms=['false'])) query.add_query(boolquery) base_agg = Aggregation(name='value_agg', type='terms', field='value.raw', size=settings.SEARCH_DROPDOWN_LENGTH, order={"max_score": "desc"}) nodegroupid_agg = Aggregation(name='nodegroupid', type='terms', field='nodegroupid') top_concept_agg = Aggregation(name='top_concept', type='terms', field='top_concept') conceptid_agg = Aggregation(name='conceptid', type='terms', field='conceptid') max_score_agg = MaxAgg(name='max_score', script='_score') top_concept_agg.add_aggregation(conceptid_agg) base_agg.add_aggregation(max_score_agg) base_agg.add_aggregation(top_concept_agg) base_agg.add_aggregation(nodegroupid_agg) query.add_aggregation(base_agg) ret[index] = [] results = query.search(index=index) for result in results['aggregations']['value_agg']['buckets']: if len(result['top_concept']['buckets']) > 0: for top_concept in result['top_concept']['buckets']: top_concept_id = top_concept['key'] top_concept_label = get_preflabel_from_conceptid(top_concept['key'], lang)['value'] for concept in top_concept['conceptid']['buckets']: ret[index].append({ 'type': 'concept', 'context': top_concept_id, 'context_label': top_concept_label, 'id': i, 'text': result['key'], 'value': concept['key'] }) i = i + 1 else: ret[index].append({ 'type': 'term', 'context': '', 'context_label': get_resource_model_label(result), 'id': i, 'text': result['key'], 'value': result['key'] }) i = i + 1 return JSONResponse(ret)
def delete(self, *args, **kwargs): se = SearchEngineFactory().create() request = kwargs.pop("request", None) provisional_edit_log_details = kwargs.pop("provisional_edit_log_details", None) for tile in self.tiles: tile.delete(*args, request=request, **kwargs) try: user = request.user user_is_reviewer = user_is_resource_reviewer(user) except AttributeError: # no user user = None user_is_reviewer = True if user_is_reviewer is True or self.user_owns_provisional(user): query = Query(se) bool_query = Bool() bool_query.filter(Terms(field="tileid", terms=[self.tileid])) query.add_query(bool_query) results = query.search(index="terms")["hits"]["hits"] for result in results: se.delete(index="terms", id=result["_id"]) self.__preDelete(request) self.save_edit( user=request.user, edit_type="tile delete", old_value=self.data, provisional_edit_log_details=provisional_edit_log_details ) super(Tile, self).delete(*args, **kwargs) resource = Resource.objects.get(resourceinstanceid=self.resourceinstance.resourceinstanceid) resource.index() else: self.apply_provisional_edit(user, data={}, action="delete") super(Tile, self).save(*args, **kwargs)
def append_dsl(self, search_results_object, permitted_nodegroups, include_provisional): nested_agg = NestedAgg(path='points', name='geo_aggs') nested_agg_filter = FiltersAgg(name='inner') geo_agg_filter = Bool() if include_provisional is True: geo_agg_filter.filter( Terms(field='points.provisional', terms=['false', 'true'])) else: if include_provisional is False: geo_agg_filter.filter( Terms(field='points.provisional', terms=['false'])) elif include_provisional is 'only provisional': geo_agg_filter.filter( Terms(field='points.provisional', terms=['true'])) geo_agg_filter.filter( Terms(field='points.nodegroup_id', terms=permitted_nodegroups)) nested_agg_filter.add_filter(geo_agg_filter) nested_agg_filter.add_aggregation( GeoHashGridAgg(field='points.point', name='grid', precision=settings.HEX_BIN_PRECISION)) nested_agg_filter.add_aggregation( GeoBoundsAgg(field='points.point', name='bounds')) nested_agg.add_aggregation(nested_agg_filter) search_results_object['query'].add_aggregation(nested_agg)
def get_preflabel_from_conceptid(conceptid, lang): ret = None default = { "category": "", "conceptid": "", "language": "", "value": "", "type": "", "id": "" } se = SearchEngineFactory().create() query = Query(se) bool_query = Bool() bool_query.must(Match(field='type', query='prefLabel', type='phrase')) bool_query.filter(Terms(field='conceptid', terms=[conceptid])) query.add_query(bool_query) preflabels = query.search(index='strings', doc_type='concept')['hits']['hits'] for preflabel in preflabels: default = preflabel['_source'] # get the label in the preferred language, otherwise get the label in the default language if preflabel['_source']['language'] == lang: return preflabel['_source'] if preflabel['_source']['language'].split('-')[0] == lang.split('-')[0]: ret = preflabel['_source'] if preflabel['_source']['language'] == settings.LANGUAGE_CODE and ret == None: ret = preflabel['_source'] return default if ret == None else ret
def get_related_resources(self, lang='en-US', limit=1000, start=0): """ Returns an object that lists the related resources, the relationship types, and a reference to the current resource """ ret = { 'resource_instance': self, 'resource_relationships': [], 'related_resources': [] } se = SearchEngineFactory().create() query = Query(se, limit=limit, start=start) bool_filter = Bool() bool_filter.should(Terms(field='resourceinstanceidfrom', terms=self.resourceinstanceid)) bool_filter.should(Terms(field='resourceinstanceidto', terms=self.resourceinstanceid)) query.add_query(bool_filter) resource_relations = query.search(index='resource_relations', doc_type='all') ret['total'] = resource_relations['hits']['total'] instanceids = set() for relation in resource_relations['hits']['hits']: relation['_source']['preflabel'] = get_preflabel_from_valueid(relation['_source']['relationshiptype'], lang) ret['resource_relationships'].append(relation['_source']) instanceids.add(relation['_source']['resourceinstanceidto']) instanceids.add(relation['_source']['resourceinstanceidfrom']) if len(instanceids) > 0: instanceids.remove(str(self.resourceinstanceid)) related_resources = se.search(index='resource', doc_type='_all', id=list(instanceids)) if related_resources: for resource in related_resources['docs']: ret['related_resources'].append(resource['_source']) return ret
def get_relations(resourceinstanceid, start, limit): query = Query(se, start=start, limit=limit) bool_filter = Bool() bool_filter.should(Terms(field='resourceinstanceidfrom', terms=resourceinstanceid)) bool_filter.should(Terms(field='resourceinstanceidto', terms=resourceinstanceid)) query.add_query(bool_filter) return query.search(index='resource_relations', doc_type='all')
def delete(self, user={}, note=""): """ Deletes a single resource and any related indexed data """ permit_deletion = False graph = models.GraphModel.objects.get(graphid=self.graph_id) if graph.isactive is False: message = _("This model is not yet active; unable to delete.") raise ModelInactiveError(message) if user != {}: user_is_reviewer = user_is_resource_reviewer(user) if user_is_reviewer is False: tiles = list(models.TileModel.objects.filter(resourceinstance=self)) resource_is_provisional = True if sum([len(t.data) for t in tiles]) == 0 else False if resource_is_provisional is True: permit_deletion = True else: permit_deletion = True else: permit_deletion = True if permit_deletion is True: related_resources = self.get_related_resources(lang="en-US", start=0, limit=1000, page=0) for rr in related_resources["resource_relationships"]: # delete any related resource entries, also reindex the resource that references this resource that's being deleted try: resourceXresource = models.ResourceXResource.objects.get(pk=rr["resourcexid"]) resource_to_reindex = ( resourceXresource.resourceinstanceidfrom_id if resourceXresource.resourceinstanceidto_id == self.resourceinstanceid else resourceXresource.resourceinstanceidto_id ) resourceXresource.delete(deletedResourceId=self.resourceinstanceid) res = Resource.objects.get(pk=resource_to_reindex) res.load_tiles() res.index() except ObjectDoesNotExist: se.delete(index=RESOURCE_RELATIONS_INDEX, id=rr["resourcexid"]) query = Query(se) bool_query = Bool() bool_query.filter(Terms(field="resourceinstanceid", terms=[self.resourceinstanceid])) query.add_query(bool_query) results = query.search(index=TERMS_INDEX)["hits"]["hits"] for result in results: se.delete(index=TERMS_INDEX, id=result["_id"]) se.delete(index=RESOURCES_INDEX, id=self.resourceinstanceid) try: self.save_edit(edit_type="delete", user=user, note=self.displayname) except: pass super(Resource, self).delete() return permit_deletion
def search_terms(request): lang = request.GET.get('lang', settings.LANGUAGE_CODE) se = SearchEngineFactory().create() searchString = request.GET.get('q', '') query = Query(se, start=0, limit=0) user_is_reviewer = request.user.groups.filter(name='Resource Reviewer').exists() boolquery = Bool() boolquery.should(Match(field='value', query=searchString.lower(), type='phrase_prefix', fuzziness='AUTO')) boolquery.should(Match(field='value.folded', query=searchString.lower(), type='phrase_prefix', fuzziness='AUTO')) boolquery.should(Match(field='value.folded', query=searchString.lower(), fuzziness='AUTO')) if user_is_reviewer is False: boolquery.filter(Terms(field='provisional', terms=['false'])) query.add_query(boolquery) base_agg = Aggregation(name='value_agg', type='terms', field='value.raw', size=settings.SEARCH_DROPDOWN_LENGTH, order={"max_score": "desc"}) nodegroupid_agg = Aggregation(name='nodegroupid', type='terms', field='nodegroupid') top_concept_agg = Aggregation(name='top_concept', type='terms', field='top_concept') conceptid_agg = Aggregation(name='conceptid', type='terms', field='conceptid') max_score_agg = MaxAgg(name='max_score', script='_score') top_concept_agg.add_aggregation(conceptid_agg) base_agg.add_aggregation(max_score_agg) base_agg.add_aggregation(top_concept_agg) base_agg.add_aggregation(nodegroupid_agg) query.add_aggregation(base_agg) results = query.search(index='strings') or {'hits': {'hits':[]}} i = 0; ret = [] for result in results['aggregations']['value_agg']['buckets']: if len(result['top_concept']['buckets']) > 0: for top_concept in result['top_concept']['buckets']: top_concept_id = top_concept['key'] top_concept_label = get_preflabel_from_conceptid(top_concept['key'], lang)['value'] for concept in top_concept['conceptid']['buckets']: ret.append({ 'type': 'concept', 'context': top_concept_id, 'context_label': top_concept_label, 'id': i, 'text': result['key'], 'value': concept['key'] }) i = i + 1 else: ret.append({ 'type': 'term', 'context': '', 'context_label': get_resource_model_label(result), 'id': i, 'text': result['key'], 'value': result['key'] }) i = i + 1 return JSONResponse(ret)
def delete(self, user={}, note=''): """ Deletes a single resource and any related indexed data """ permit_deletion = False graph = models.GraphModel.objects.get(graphid=self.graph_id) if graph.isactive is False: message = _('This model is not yet active; unable to delete.') raise ModelInactiveError(message) if user != {}: user_is_reviewer = user.groups.filter( name='Resource Reviewer').exists() if user_is_reviewer is False: tiles = list( models.TileModel.objects.filter(resourceinstance=self)) resource_is_provisional = True if sum( [len(t.data) for t in tiles]) == 0 else False if resource_is_provisional is True: permit_deletion = True else: permit_deletion = True else: permit_deletion = True if permit_deletion is True: se = SearchEngineFactory().create() related_resources = self.get_related_resources(lang="en-US", start=0, limit=1000, page=0) for rr in related_resources['resource_relationships']: models.ResourceXResource.objects.get( pk=rr['resourcexid']).delete() query = Query(se) bool_query = Bool() bool_query.filter( Terms(field='resourceinstanceid', terms=[self.resourceinstanceid])) query.add_query(bool_query) results = query.search(index='terms')['hits']['hits'] for result in results: se.delete(index='terms', id=result['_id']) se.delete(index='resources', id=self.resourceinstanceid) self.save_edit(edit_type='delete', user=user, note=self.displayname) super(Resource, self).delete() return permit_deletion
def add_resourceid_filter_clause(self, graphid, user): allowed = UserXResourceInstanceAccess.objects.filter( user=user, resource__graph_id=graphid, ) resids = [str(i.resource.resourceinstanceid) for i in allowed] new_resid_filter = Bool() new_resid_filter.should(Terms(field='resourceinstanceid', terms=resids)) if self.existing_query: self.paramount.should(new_resid_filter) else: self.paramount.must(new_resid_filter)
def get_auto_filter(request): lang = request.GET.get('lang', settings.LANGUAGE_CODE) se1 = SearchEngineFactory().create() searchString1 = settings.PUBLISHED_LABEL query1 = Query(se1, start=0, limit=settings.SEARCH_DROPDOWN_LENGTH) boolquery1 = Bool() boolquery1.should(Match(field='term', query=searchString1.lower(), type='phrase_prefix', fuzziness='AUTO')) boolquery1.should(Match(field='term.folded', query=searchString1.lower(), type='phrase_prefix', fuzziness='AUTO')) boolquery1.should(Match(field='term.folded', query=searchString1.lower(), fuzziness='AUTO')) query1.add_query(boolquery1) results1 = query1.search(index='term', doc_type='value') conceptid1 = '' context1 = '' for result1 in results1['hits']['hits']: prefLabel = get_preflabel_from_conceptid(result1['_source']['context'], lang) result1['_source']['options']['context_label'] = prefLabel['value'] if (prefLabel['value'] == settings.EW_STATUS_TERM and result1['_source']['term'] == settings.PUBLISHED_LABEL) : conceptid1 = result1['_source']['options']['conceptid'] context1 = result1['_source']['context'] AUTO_TERM_FILTER = {"inverted": False, "type": "concept"} AUTO_TERM_FILTER["text"] = settings.PUBLISHED_LABEL AUTO_TERM_FILTER["value"] = conceptid1 AUTO_TERM_FILTER["context"] = context1 AUTO_TERM_FILTER["context_label"] = settings.EW_STATUS_TERM AUTO_TERM_FILTER["id"] = AUTO_TERM_FILTER['text'] + conceptid1 return AUTO_TERM_FILTER
def build_search_results_dsl(request): temporal_filters = JSONDeserializer().deserialize(request.GET.get('temporalFilter', None)) query = build_base_search_results_dsl(request) boolfilter = Bool() if 'filters' in temporal_filters: for temporal_filter in temporal_filters['filters']: terms = Terms(field='date_groups.conceptid', terms=temporal_filter['date_types__value']) boolfilter.must(terms) date_value = datetime.strptime(temporal_filter['date'], '%d/%m/%Y').isoformat() if temporal_filter['date_operators__value'] == '1': # equals query range = Range(field='date_groups.value', gte=date_value, lte=date_value) elif temporal_filter['date_operators__value'] == '0': # greater than query range = Range(field='date_groups.value', lt=date_value) elif temporal_filter['date_operators__value'] == '2': # less than query range = Range(field='date_groups.value', gt=date_value) if 'inverted' not in temporal_filters: temporal_filters['inverted'] = False if temporal_filters['inverted']: boolfilter.must_not(range) else: boolfilter.must(range) query.add_filter(boolfilter) return query
def create_nested_geo_filter(self, geometry): ## process GEOS geometry object into geojson and create ES filter geojson_geom = JSONDeserializer().deserialize(geometry.geojson) geoshape = GeoShape( field="geometries.geom.features.geometry", type=geojson_geom["type"], coordinates=geojson_geom["coordinates"] ) new_spatial_filter = Bool() new_spatial_filter.filter(geoshape) nested = Nested(path='geometries', query=new_spatial_filter) return nested
def delete(self, *args, **kwargs): se = SearchEngineFactory().create() request = kwargs.pop("request", None) index = kwargs.pop("index", True) transaction_id = kwargs.pop("index", None) provisional_edit_log_details = kwargs.pop( "provisional_edit_log_details", None) for tile in self.tiles: tile.delete(*args, request=request, **kwargs) try: user = request.user user_is_reviewer = user_is_resource_reviewer(user) except AttributeError: # no user user = None user_is_reviewer = True if user_is_reviewer is True or self.user_owns_provisional(user): if index: query = Query(se) bool_query = Bool() bool_query.filter(Terms(field="tileid", terms=[self.tileid])) query.add_query(bool_query) results = query.delete(index=TERMS_INDEX) self.__preDelete(request) self.save_edit( user=user, edit_type="tile delete", old_value=self.data, provisional_edit_log_details=provisional_edit_log_details, transaction_id=transaction_id, ) try: super(Tile, self).delete(*args, **kwargs) for nodeid in self.data.keys(): node = models.Node.objects.get(nodeid=nodeid) datatype = self.datatype_factory.get_instance( node.datatype) datatype.post_tile_delete(self, nodeid, index=index) if index: self.index() except IntegrityError as e: logger.error(e) else: self.apply_provisional_edit(user, data={}, action="delete") super(Tile, self).save(*args, **kwargs)
def add_neg_filter(query): """adds a boolfilter that omits any resource that is protected with a certain conceptid, this is simply a negative test for a specific conceptid""" # get all the protection level conceptid conceptid = get_protection_conceptids(settings.PROTECTION_LEVEL_NODE) # create boolfilter boolfilter = Bool() terms = Terms(field="domains.conceptid", terms=conceptid) nested = Nested(path="domains", query=terms) boolfilter.must_not(nested) # add filter to query query.add_filter(boolfilter) return query
def build_search_terms_dsl(request): se = SearchEngineFactory().create() searchString = request.GET.get('q', '') query = Query(se, start=0, limit=settings.SEARCH_DROPDOWN_LENGTH) boolquery = Bool() boolquery.should(Match(field='term', query=searchString.lower(), type='phrase_prefix', fuzziness='AUTO')) boolquery.should(Match(field='term.folded', query=searchString.lower(), type='phrase_prefix', fuzziness='AUTO')) boolquery.should(Match(field='term.folded', query=searchString.lower(), fuzziness='AUTO')) query.add_query(boolquery) return query
def search_terms(request): lang = request.GET.get('lang', settings.LANGUAGE_CODE) se = SearchEngineFactory().create() searchString = request.GET.get('q', '') query = Query(se, start=0, limit=0) boolquery = Bool() boolquery.should(Match(field='value', query=searchString.lower(), type='phrase_prefix', fuzziness='AUTO')) boolquery.should(Match(field='value.folded', query=searchString.lower(), type='phrase_prefix', fuzziness='AUTO')) boolquery.should(Match(field='value.folded', query=searchString.lower(), fuzziness='AUTO')) query.add_query(boolquery) base_agg = Aggregation(name='value_agg', type='terms', field='value.raw', size=settings.SEARCH_DROPDOWN_LENGTH, order={"max_score": "desc"}) nodegroupid_agg = Aggregation(name='nodegroupid', type='terms', field='nodegroupid') top_concept_agg = Aggregation(name='top_concept', type='terms', field='top_concept') conceptid_agg = Aggregation(name='conceptid', type='terms', field='conceptid') max_score_agg = MaxAgg(name='max_score', script='_score') top_concept_agg.add_aggregation(conceptid_agg) base_agg.add_aggregation(max_score_agg) base_agg.add_aggregation(top_concept_agg) base_agg.add_aggregation(nodegroupid_agg) query.add_aggregation(base_agg) results = query.search(index='strings') or {'hits': {'hits':[]}} i = 0; ret = [] for result in results['aggregations']['value_agg']['buckets']: if len(result['top_concept']['buckets']) > 0: for top_concept in result['top_concept']['buckets']: top_concept_id = top_concept['key'] top_concept_label = get_preflabel_from_conceptid(top_concept['key'], lang)['value'] for concept in top_concept['conceptid']['buckets']: ret.append({ 'type': 'concept', 'context': top_concept_id, 'context_label': top_concept_label, 'id': i, 'text': result['key'], 'value': concept['key'] }) i = i + 1 else: ret.append({ 'type': 'term', 'context': '', 'context_label': '', 'id': i, 'text': result['key'], 'value': result['key'] }) i = i + 1 return JSONResponse(ret)
def append_dsl(self, search_results_object, permitted_nodegroups, include_provisional): search_query = Bool() if include_provisional is not True: provisional_resource_filter = Bool() if include_provisional is False: provisional_resource_filter.filter( Terms(field='provisional_resource', terms=['false', 'partial'])) elif include_provisional is 'only provisional': provisional_resource_filter.filter( Terms(field='provisional_resource', terms=['true', 'partial'])) search_query.must(provisional_resource_filter) search_results_object['query'].add_query(search_query)
def delete(self, *args, **kwargs): se = SearchEngineFactory().create() request = kwargs.pop('request', None) for tiles in self.tiles.itervalues(): for tile in tiles: tile.delete(*args, request=request, **kwargs) query = Query(se) bool_query = Bool() bool_query.filter(Terms(field='tileid', terms=[self.tileid])) query.add_query(bool_query) results = query.search(index='strings', doc_type='term')['hits']['hits'] for result in results: se.delete(index='strings', doc_type='term', id=result['_id']) self.__preDelete(request) super(Tile, self).delete(*args, **kwargs) resource = Resource.objects.get(resourceinstanceid=self.resourceinstance.resourceinstanceid) resource.index()
def append_dsl(self, search_results_object, permitted_nodegroups, include_provisional): search_query = Bool() if include_provisional is not True: provisional_resource_filter = Bool() if include_provisional is False: provisional_resource_filter.filter( Terms(field="provisional_resource", terms=["false", "partial"])) elif include_provisional == "only provisional": provisional_resource_filter.filter( Terms(field="provisional_resource", terms=["true", "partial"])) search_query.must(provisional_resource_filter) search_results_object["query"].add_query(search_query)
def delete(self): """ Deletes a single resource and any related indexed data """ se = SearchEngineFactory().create() related_resources = self.get_related_resources(lang="en-US", start=0, limit=15) for rr in related_resources['resource_relationships']: models.ResourceXResource.objects.get(pk=rr['resourcexid']).delete() query = Query(se) bool_query = Bool() bool_query.filter(Terms(field='resourceinstanceid', terms=[self.resourceinstanceid])) query.add_query(bool_query) results = query.search(index='strings', doc_type='term')['hits']['hits'] for result in results: se.delete(index='strings', doc_type='term', id=result['_id']) se.delete(index='resource', doc_type=str(self.graph_id), id=self.resourceinstanceid) super(Resource, self).delete()
def delete(self, *args, **kwargs): se = SearchEngineFactory().create() request = kwargs.pop('request', None) for tiles in self.tiles.itervalues(): for tile in tiles: tile.delete(*args, request=request, **kwargs) query = Query(se) bool_query = Bool() bool_query.filter(Terms(field='tileid', terms=[self.tileid])) query.add_query(bool_query) results = query.search(index='strings', doc_type='term')['hits']['hits'] for result in results: se.delete(index='strings', doc_type='term', id=result['_id']) self.__preDelete(request) self.save_edit(user=request.user, edit_type='tile delete', old_value=self.data) super(Tile, self).delete(*args, **kwargs) resource = Resource.objects.get(resourceinstanceid=self.resourceinstance.resourceinstanceid) resource.index()
def get_resource_bounds(node): query = Query(se, start=0, limit=0) search_query = Bool() query.add_query(search_query) query.add_aggregation( GeoBoundsAgg(field='points.point', name='bounds')) results = query.search(index='resource', doc_type=[str(node.graph_id)]) bounds = results['aggregations']['bounds'][ 'bounds'] if 'bounds' in results['aggregations'][ 'bounds'] else None return bounds
def get_preflabel_from_conceptid(conceptid, lang): ret = None default = { "category": "", "conceptid": "", "language": "", "value": "", "type": "", "id": "" } query = Query(se) bool_query = Bool() bool_query.must(Match(field="type", query="prefLabel", type="phrase")) bool_query.filter(Terms(field="conceptid", terms=[conceptid])) query.add_query(bool_query) preflabels = query.search(index=CONCEPTS_INDEX)["hits"]["hits"] for preflabel in preflabels: default = preflabel["_source"] # get the label in the preferred language, otherwise get the label in the default language if preflabel["_source"]["language"] == lang: return preflabel["_source"] if preflabel["_source"]["language"].split("-")[0] == lang.split( "-")[0]: ret = preflabel["_source"] if preflabel["_source"][ "language"] == settings.LANGUAGE_CODE and ret is None: ret = preflabel["_source"] return default if ret is None else ret
def get_relations(resourceinstanceid, start, limit): query = Query(se, start=start, limit=limit) bool_filter = Bool() bool_filter.should(Terms(field="resourceinstanceidfrom", terms=resourceinstanceid)) bool_filter.should(Terms(field="resourceinstanceidto", terms=resourceinstanceid)) query.add_query(bool_filter) return query.search(index="resource_relations")
def get_resource_bounds(node): query = Query(se, start=0, limit=0) search_query = Bool() query.add_query(search_query) query.add_aggregation( GeoBoundsAgg(field="points.point", name="bounds")) query.add_query( Term(field="graph_id", term=str(node.graph.graphid))) results = query.search(index=RESOURCES_INDEX) bounds = results["aggregations"]["bounds"][ "bounds"] if "bounds" in results["aggregations"][ "bounds"] else None return bounds
def get_restricted_instances(user): if user.is_superuser is False: se = SearchEngineFactory().create() query = Query(se, start=0, limit=settings.SEARCH_RESULT_LIMIT) has_access = Bool() terms = Terms(field="permissions.users_with_no_access", terms=[str(user.id)]) has_access.must(terms) query.add_query(has_access) results = query.search(index="resources", scroll="1m") scroll_id = results["_scroll_id"] total = results["hits"]["total"]["value"] if total > settings.SEARCH_RESULT_LIMIT: pages = total // settings.SEARCH_RESULT_LIMIT for page in range(pages): results_scrolled = query.se.es.scroll(scroll_id=scroll_id, scroll="1m") results["hits"]["hits"] += results_scrolled["hits"]["hits"] restricted_ids = [res["_id"] for res in results["hits"]["hits"]] return restricted_ids else: return []
def create_nested_attribute_filter(self, doc_id, nodegroup_id, value_list): new_string_filter = Bool() new_string_filter.filter(Terms(field='strings.nodegroup_id', terms=[nodegroup_id])) for value in value_list: new_string_filter.should(Match(field='strings.string', query=value, type='phrase')) nested = Nested(path='strings', query=new_string_filter) return nested
def get_search_contexts(request): search_context = {} search_context = cache.get('search_contexts') if search_context is not None: #print 'Search_context iz cacha!' return search_context lang = request.GET.get('lang', settings.LANGUAGE_CODE) se1 = SearchEngineFactory().create() context_label1 = '-' search_context = {} for search_term in settings.SEARCH_TERMS: searchString1 = search_term['text'] print searchString1 query1 = Query(se1, start=0, limit=settings.SEARCH_DROPDOWN_LENGTH) boolquery1 = Bool() boolquery1.should(Match(field='term', query=searchString1.lower(), type='phrase_prefix', fuzziness='AUTO')) boolquery1.should(Match(field='term.folded', query=searchString1.lower(), type='phrase_prefix', fuzziness='AUTO')) boolquery1.should(Match(field='term.folded', query=searchString1.lower(), fuzziness='AUTO')) query1.add_query(boolquery1) results1 = query1.search(index='term', doc_type='value') conceptid1 = '' context1 = '' for result1 in results1['hits']['hits']: prefLabel = get_preflabel_from_conceptid(result1['_source']['context'], lang) result1['_source']['options']['context_label'] = prefLabel['value'] if (prefLabel['value'] == search_term['context_label'] and result1['_source']['term'] == search_term['text']): conceptid1 = result1['_source']['options']['conceptid'] context1 = result1['_source']['context'] #print search_term['context_label'] + ': ' + conceptid1 #print searchString1 #print result1 result = {'conceptid': conceptid1, 'context': context1} if context_label1 <> search_term['context_label']: value = {} print result value[search_term['text_key']] = result #print value search_context[search_term['context_key']] = value #print search_context #print 'Iscem [' + search_term['context_label'] + '][' + search_term['text'] + ']' #print value context_label1 = search_term['context_label'] #print search_context #print search_context['Historical_Period']['BRONZE_AGE'] #print 'Shranjujem search_context v cache' cache.set('search_contexts', search_context, 86400) return search_context
def get_relations(resourceinstanceid, start, limit): query = Query(se, limit=limit, start=start) bool_filter = Bool() bool_filter.should( Terms(field='resourceinstanceidfrom', terms=resourceinstanceid)) bool_filter.should( Terms(field='resourceinstanceidto', terms=resourceinstanceid)) query.add_query(bool_filter) return query.search(index='resource_relations', doc_type='all')
def get_search_range_contexts(request): search_range_context = {} search_range_context = cache.get('search_range_contexts') if search_range_context is not None: #print 'Search_range_context iz cacha!' return search_range_context lang = request.GET.get('lang', request.LANGUAGE_CODE) se1 = SearchEngineFactory().create() context_label1 = '-' search_range_context = {} for search_term in settings.RANGE_TERMS: searchString1 = search_term['text'] query1 = Query(se1, start=0, limit=settings.SEARCH_DROPDOWN_LENGTH) boolquery1 = Bool() boolquery1.should(Match(field='term', query=searchString1.lower(), type='phrase_prefix', fuzziness='AUTO')) boolquery1.should(Match(field='term.folded', query=searchString1.lower(), type='phrase_prefix', fuzziness='AUTO')) boolquery1.should(Match(field='term.folded', query=searchString1.lower(), fuzziness='AUTO')) query1.add_query(boolquery1) results1 = query1.search(index='term', doc_type='value') conceptid1 = '' context1 = '' for result1 in results1['hits']['hits']: prefLabel = get_preflabel_from_conceptid(result1['_source']['context'], lang) result1['_source']['options']['context_label'] = prefLabel['value'] if (prefLabel['value'] == search_term['context_label'] and result1['_source']['term'] == search_term['text']): #print result1['_source']['ids'][0] conceptid1 = result1['_source']['options']['conceptid'] context1 = result1['_source']['context'] valueid1 = result1['_source']['ids'][0] #print search_term['context_label'] + ': ' + conceptid1 #print searchString1 #print result1 result = {'conceptid': conceptid1, 'context': context1, 'valueid': valueid1} if context_label1 <> search_term['context_label']: value = {} #print result value[search_term['text_key']] = result #print value search_range_context[search_term['context_key']] = value #print search_range_context #print 'Iscem [' + search_term['context_label'] + '][' + search_term['text'] + ']' #print value context_label1 = search_term['context_label'] #print search_range_context #print search_range_context['Historical_Period']['BRONZE_AGE'] #print 'Shranjujem search_range_context v cache' cache.set('search_range_contexts', search_range_context, 86400) return search_range_context
def build_search_results_dsl(request): temporal_filters = JSONDeserializer().deserialize(request.GET.get("temporalFilter", None)) query = build_base_search_results_dsl(request) boolfilter = Bool() if "filters" in temporal_filters: for temporal_filter in temporal_filters["filters"]: date_type = "" date = "" date_operator = "" for node in temporal_filter["nodes"]: if node["entitytypeid"] == "DATE_COMPARISON_OPERATOR.E55": date_operator = node["value"] elif node["entitytypeid"] == "date": date = node["value"] else: date_type = node["value"] terms = Terms(field="date_groups.conceptid", terms=date_type) boolfilter.must(terms) date_value = datetime.strptime(date, "%Y-%m-%d").isoformat() if date_operator == "1": # equals query range = Range(field="date_groups.value", gte=date_value, lte=date_value) elif date_operator == "0": # greater than query range = Range(field="date_groups.value", lt=date_value) elif date_operator == "2": # less than query range = Range(field="date_groups.value", gt=date_value) if "inverted" not in temporal_filters: temporal_filters["inverted"] = False if temporal_filters["inverted"]: boolfilter.must_not(range) else: boolfilter.must(range) query.add_filter(boolfilter) return query
def build_search_results_dsl(request): term_filter = request.GET.get('termFilter', '') spatial_filter = JSONDeserializer().deserialize(request.GET.get('spatialFilter', None)) export = request.GET.get('export', None) page = 1 if request.GET.get('page') == '' else int(request.GET.get('page', 1)) temporal_filter = JSONDeserializer().deserialize(request.GET.get('temporalFilter', None)) se = SearchEngineFactory().create() if export != None: limit = settings.SEARCH_EXPORT_ITEMS_PER_PAGE else: limit = settings.SEARCH_ITEMS_PER_PAGE query = Query(se, start=limit*int(page-1), limit=limit) boolquery = Bool() boolfilter = Bool() if term_filter != '': for term in JSONDeserializer().deserialize(term_filter): if term['type'] == 'term': entitytype = models.EntityTypes.objects.get(conceptid_id=term['context']) boolfilter_nested = Bool() boolfilter_nested.must(Terms(field='child_entities.entitytypeid', terms=[entitytype.pk])) boolfilter_nested.must(Match(field='child_entities.value', query=term['value'], type='phrase')) nested = Nested(path='child_entities', query=boolfilter_nested) if term['inverted']: boolfilter.must_not(nested) else: boolfilter.must(nested) elif term['type'] == 'concept': concept_ids = _get_child_concepts(term['value']) terms = Terms(field='domains.conceptid', terms=concept_ids) nested = Nested(path='domains', query=terms) if term['inverted']: boolfilter.must_not(nested) else: boolfilter.must(nested) elif term['type'] == 'string': boolfilter_folded = Bool() boolfilter_folded.should(Match(field='child_entities.value', query=term['value'], type='phrase_prefix')) boolfilter_folded.should(Match(field='child_entities.value.folded', query=term['value'], type='phrase_prefix')) nested = Nested(path='child_entities', query=boolfilter_folded) if term['inverted']: boolquery.must_not(nested) else: boolquery.must(nested) if 'geometry' in spatial_filter and 'type' in spatial_filter['geometry'] and spatial_filter['geometry']['type'] != '': geojson = spatial_filter['geometry'] if geojson['type'] == 'bbox': coordinates = [[geojson['coordinates'][0],geojson['coordinates'][3]], [geojson['coordinates'][2],geojson['coordinates'][1]]] geoshape = GeoShape(field='geometries.value', type='envelope', coordinates=coordinates ) nested = Nested(path='geometries', query=geoshape) else: buffer = spatial_filter['buffer'] geojson = JSONDeserializer().deserialize(_buffer(geojson,buffer['width'],buffer['unit']).json) geoshape = GeoShape(field='geometries.value', type=geojson['type'], coordinates=geojson['coordinates'] ) nested = Nested(path='geometries', query=geoshape) if 'inverted' not in spatial_filter: spatial_filter['inverted'] = False if spatial_filter['inverted']: boolfilter.must_not(nested) else: boolfilter.must(nested) if 'year_min_max' in temporal_filter and len(temporal_filter['year_min_max']) == 2: start_date = date(temporal_filter['year_min_max'][0], 1, 1) end_date = date(temporal_filter['year_min_max'][1], 12, 31) if start_date: start_date = start_date.isoformat() if end_date: end_date = end_date.isoformat() range = Range(field='dates.value', gte=start_date, lte=end_date) nested = Nested(path='dates', query=range) if 'inverted' not in temporal_filter: temporal_filter['inverted'] = False if temporal_filter['inverted']: boolfilter.must_not(nested) else: boolfilter.must(nested) if not boolquery.empty: query.add_query(boolquery) if not boolfilter.empty: query.add_filter(boolfilter) return query
def gen_range_agg(gte=None, lte=None, permitted_nodegroups=None): date_query = Bool() date_query.filter(Range(field='dates.date', gte=gte, lte=lte, relation='intersects')) if permitted_nodegroups: date_query.filter(Terms(field='dates.nodegroup_id', terms=permitted_nodegroups)) date_ranges_query = Bool() date_ranges_query.filter(Range(field='date_ranges.date_range', gte=gte, lte=lte, relation='intersects')) if permitted_nodegroups: date_ranges_query.filter(Terms(field='date_ranges.nodegroup_id', terms=permitted_nodegroups)) wrapper_query = Bool() wrapper_query.should(Nested(path='date_ranges', query=date_ranges_query)) wrapper_query.should(Nested(path='dates', query=date_query)) return wrapper_query
def build_search_results_dsl(request): term_filter = request.GET.get('termFilter', '') spatial_filter = JSONDeserializer().deserialize(request.GET.get('mapFilter', '{}')) export = request.GET.get('export', None) page = 1 if request.GET.get('page') == '' else int(request.GET.get('page', 1)) temporal_filter = JSONDeserializer().deserialize(request.GET.get('temporalFilter', '{}')) advanced_filters = JSONDeserializer().deserialize(request.GET.get('advanced', '[]')) search_buffer = None se = SearchEngineFactory().create() if export != None: limit = settings.SEARCH_EXPORT_ITEMS_PER_PAGE else: limit = settings.SEARCH_ITEMS_PER_PAGE query = Query(se, start=limit*int(page-1), limit=limit) nested_agg = NestedAgg(path='points', name='geo_aggs') nested_agg.add_aggregation(GeoHashGridAgg(field='points.point', name='grid', precision=settings.HEX_BIN_PRECISION)) nested_agg.add_aggregation(GeoBoundsAgg(field='points.point', name='bounds')) query.add_aggregation(nested_agg) search_query = Bool() permitted_nodegroups = get_permitted_nodegroups(request.user) if term_filter != '': for term in JSONDeserializer().deserialize(term_filter): term_query = Bool() if term['type'] == 'term' or term['type'] == 'string': string_filter = Bool() if term['type'] == 'term': string_filter.must(Match(field='strings.string', query=term['value'], type='phrase')) elif term['type'] == 'string': string_filter.should(Match(field='strings.string', query=term['value'], type='phrase_prefix')) string_filter.should(Match(field='strings.string.folded', query=term['value'], type='phrase_prefix')) string_filter.filter(Terms(field='strings.nodegroup_id', terms=permitted_nodegroups)) nested_string_filter = Nested(path='strings', query=string_filter) if term['inverted']: search_query.must_not(nested_string_filter) else: search_query.must(nested_string_filter) # need to set min_score because the query returns results with score 0 and those have to be removed, which I don't think it should be doing query.min_score('0.01') elif term['type'] == 'concept': concept_ids = _get_child_concepts(term['value']) conceptid_filter = Bool() conceptid_filter.filter(Terms(field='domains.conceptid', terms=concept_ids)) conceptid_filter.filter(Terms(field='domains.nodegroup_id', terms=permitted_nodegroups)) nested_conceptid_filter = Nested(path='domains', query=conceptid_filter) if term['inverted']: search_query.must_not(nested_conceptid_filter) else: search_query.filter(nested_conceptid_filter) if 'features' in spatial_filter: if len(spatial_filter['features']) > 0: feature_geom = spatial_filter['features'][0]['geometry'] feature_properties = spatial_filter['features'][0]['properties'] buffer = {'width':0,'unit':'ft'} if 'buffer' in feature_properties: buffer = feature_properties['buffer'] search_buffer = _buffer(feature_geom, buffer['width'], buffer['unit']) feature_geom = JSONDeserializer().deserialize(search_buffer.json) geoshape = GeoShape(field='geometries.geom.features.geometry', type=feature_geom['type'], coordinates=feature_geom['coordinates'] ) invert_spatial_search = False if 'inverted' in feature_properties: invert_spatial_search = feature_properties['inverted'] spatial_query = Bool() if invert_spatial_search == True: spatial_query.must_not(geoshape) else: spatial_query.filter(geoshape) # get the nodegroup_ids that the user has permission to search spatial_query.filter(Terms(field='geometries.nodegroup_id', terms=permitted_nodegroups)) search_query.filter(Nested(path='geometries', query=spatial_query)) if 'fromDate' in temporal_filter and 'toDate' in temporal_filter: now = str(datetime.utcnow()) start_date = SortableDate(temporal_filter['fromDate']) end_date = SortableDate(temporal_filter['toDate']) date_nodeid = str(temporal_filter['dateNodeId']) if 'dateNodeId' in temporal_filter and temporal_filter['dateNodeId'] != '' else None query_inverted = False if 'inverted' not in temporal_filter else temporal_filter['inverted'] temporal_query = Bool() if query_inverted: # inverted date searches need to use an OR clause and are generally more complicated to structure (can't use ES must_not) # eg: less than START_DATE OR greater than END_DATE inverted_date_query = Bool() inverted_date_ranges_query = Bool() if start_date.is_valid(): inverted_date_query.should(Range(field='dates.date', lt=start_date.as_float())) inverted_date_ranges_query.should(Range(field='date_ranges.date_range', lt=start_date.as_float())) if end_date.is_valid(): inverted_date_query.should(Range(field='dates.date', gt=end_date.as_float())) inverted_date_ranges_query.should(Range(field='date_ranges.date_range', gt=end_date.as_float())) date_query = Bool() date_query.filter(inverted_date_query) date_query.filter(Terms(field='dates.nodegroup_id', terms=permitted_nodegroups)) if date_nodeid: date_query.filter(Term(field='dates.nodeid', term=date_nodeid)) else: date_ranges_query = Bool() date_ranges_query.filter(inverted_date_ranges_query) date_ranges_query.filter(Terms(field='date_ranges.nodegroup_id', terms=permitted_nodegroups)) temporal_query.should(Nested(path='date_ranges', query=date_ranges_query)) temporal_query.should(Nested(path='dates', query=date_query)) else: date_query = Bool() date_query.filter(Range(field='dates.date', gte=start_date.as_float(), lte=end_date.as_float())) date_query.filter(Terms(field='dates.nodegroup_id', terms=permitted_nodegroups)) if date_nodeid: date_query.filter(Term(field='dates.nodeid', term=date_nodeid)) else: date_ranges_query = Bool() date_ranges_query.filter(Range(field='date_ranges.date_range', gte=start_date.as_float(), lte=end_date.as_float(), relation='intersects')) date_ranges_query.filter(Terms(field='date_ranges.nodegroup_id', terms=permitted_nodegroups)) temporal_query.should(Nested(path='date_ranges', query=date_ranges_query)) temporal_query.should(Nested(path='dates', query=date_query)) search_query.filter(temporal_query) #print search_query.dsl datatype_factory = DataTypeFactory() if len(advanced_filters) > 0: advanced_query = Bool() grouped_query = Bool() grouped_queries = [grouped_query] for index, advanced_filter in enumerate(advanced_filters): tile_query = Bool() for key, val in advanced_filter.iteritems(): if key != 'op': node = models.Node.objects.get(pk=key) if request.user.has_perm('read_nodegroup', node.nodegroup): datatype = datatype_factory.get_instance(node.datatype) datatype.append_search_filters(val, node, tile_query, request) nested_query = Nested(path='tiles', query=tile_query) if advanced_filter['op'] == 'or' and index != 0: grouped_query = Bool() grouped_queries.append(grouped_query) grouped_query.must(nested_query) for grouped_query in grouped_queries: advanced_query.should(grouped_query) search_query.must(advanced_query) query.add_query(search_query) if search_buffer != None: search_buffer = search_buffer.geojson return {'query': query, 'search_buffer':search_buffer}
def nested_query_from_pathed_values(pathed_values, stem): """ Given an array of pathed values to query terms from the root, return a nested query pathed_values: e.g. [ { val: '29430-4955-...' path: [a, b, c] } ] stem: the path into the index for the nested terms. This will be of the form 'nested_entity.child_entities.child_entities' """ # f( [[A,B,C], [A,B,D] ) # = Nested( AND( f( [[B,C],[B,D]] )) # = Nested( AND( Nested( AND( f([[C],[D]]) )) )) # = Nested( AND( Nested( AND( valueC, valueD)))) # f( [[A,B,C], [A,B,D], [A,B,D] ) # = Nested( AND( f([[B,C],[B,D],[B,D]] )) # = Nested( AND( Nested( AND( f([[C],[D],[D]]) )) )) # = Nested( AND( Nested( AND( valueB, valueD)))) # group paths by their head of each paths list is the same, make a single nested query and recurse on the tails branch_groups = {} # those groups with a continuing tail, where we will recursively build a nested query leaf_groups = [] # those groups without a continuing tail, where we will use an ordinary term query # build the groups for v in pathed_values: path = v['path'] if len(path) == 1: # this goes in its own group leaf_groups.append(v) else: # see if there is already a group using this head head = v['path'][0]['entitytyperange'] if head not in branch_groups: branch_groups[head] = [] branch_groups[head].append(v) # We should now have a set of groups # create the bool query bool_term = Bool() # add terms for any leaf groups for leaf_group in leaf_groups: if leaf_group['term']['type'] == 'concept': if leaf_group['term']['inverted']: terms = Terms(field=stem+'.conceptid', terms=leaf_group['term']['value']) n_terms = Nested(path=stem, query=terms) bool_term.must_not(n_terms) else: terms = Terms(field=stem+'.conceptid', terms=leaf_group['term']['value']) n_terms = Nested(path=stem, query=terms) bool_term.must(n_terms) elif leaf_group['term']['type'] == 'term': # boolfilter_nested.must(Terms(field='child_entities.entitytypeid', terms=[entitytype.pk])) # boolfilter_nested.must(Match(field='child_entities.value', query=term['value'], type='phrase')) entitytype = models.EntityTypes.objects.get(conceptid_id=leaf_group['term']['context']) sub_bool = Bool() if leaf_group['term']['inverted']: sub_bool.must_not(Terms(field=stem+'.entitytypeid', terms=[entitytype.pk])) sub_bool.must_not(Match(field=stem+'.value', query=leaf_group['term']['value'], type='phrase')) else: sub_bool.must(Terms(field=stem+'.entitytypeid', terms=[entitytype.pk])) sub_bool.must(Match(field=stem+'.value', query=leaf_group['term']['value'], type='phrase')) nsub_bool = Nested(path=stem, query=sub_bool) bool_term.must(nsub_bool) elif leaf_group['term']['type'] == 'string': boolfilter_folded = Bool() boolfilter_folded.should(Match(field=stem+'.flat_child_entities.label', query=leaf_group['term']['value'], type='phrase_prefix', fuzziness='AUTO')) boolfilter_folded.should(Match(field=stem+'.flat_child_entities.label.folded', query=leaf_group['term']['value'], type='phrase_prefix', fuzziness='AUTO')) boolfilter_folded.should(Match(field=stem+'.flat_child_entities.label.folded', query=leaf_group['term']['value'], fuzziness='AUTO')) nested = Nested(path=stem+'.flat_child_entities', query=boolfilter_folded) if leaf_group['term']['inverted']: bool_term.must_not(nested) else: bool_term.must(nested) elif leaf_group['term']['type'] == 'date': if leaf_group['term']['date_operator'] == '1': # equals query daterange = Range(field=stem+'.flat_child_entities.date', gte=leaf_group['term']['date_value'], lte=leaf_group['term']['date_value']) elif leaf_group['term']['date_operator'] == '0': # greater than query daterange = Range(field=stem+'.flat_child_entities.date', lt=leaf_group['term']['date_value']) elif leaf_group['term']['date_operator'] == '2': # less than query daterange = Range(field=stem+'.flat_child_entities.date', gt=leaf_group['term']['date_value']) elif leaf_group['term']['date_operator'] == '3': # greater than and less than query daterange = Range(field=stem+'.flat_child_entities.date', gte=leaf_group['term']['start_date'], lte=leaf_group['term']['end_date']) nested_date = Nested(path=stem+'.flat_child_entities', query=daterange) if leaf_group['term']['inverted']: bool_term.must_not(nested_date) else: bool_term.must(nested_date) # add terms for any branch groups for key in branch_groups: # add a nested term for each group branch_group = branch_groups[key] #remove head from each path and recurse for value in branch_group: value['path'] = value['path'][1:] sub_query = nested_query_from_pathed_values(branch_group, stem+'.child_entities') nsub_query = Nested(path=stem, query=sub_query) bool_term.must(nsub_query) return bool_term;
def prepare_documents_for_search_index(self): """ Generates a list of specialized resource based documents to support resource search """ # Arches_hip documents = super(Resource, self).prepare_documents_for_search_index() for document in documents: document['date_groups'] = [] for nodes in self.get_nodes('BEGINNING_OF_EXISTENCE.E63', keys=['value']): document['date_groups'].append({ 'conceptid': nodes['BEGINNING_OF_EXISTENCE_TYPE_E55__value'], 'value': nodes['START_DATE_OF_EXISTENCE_E49__value'] }) for nodes in self.get_nodes('END_OF_EXISTENCE.E64', keys=['value']): document['date_groups'].append({ 'conceptid': nodes['END_OF_EXISTENCE_TYPE_E55__value'], 'value': nodes['END_DATE_OF_EXISTENCE_E49__value'] }) for nodes in self.get_nodes('GRAVE_MEASUREMENT_TYPE.E55', keys=['value','label']): # Poiscemo in shranimo le contextid (sicer je v vsakem jeziku drugacna vrednost) lang = settings.LANGUAGE_CODE se1 = SearchEngineFactory().create() context_label1 = '-' search_context = {} #print 'Iscem podatke za ' + nodes['GRAVE_MEASUREMENT_TYPE_E55__value'] searchString1 = nodes['GRAVE_MEASUREMENT_TYPE_E55__label'] query1 = Query(se1, start=0, limit=settings.SEARCH_DROPDOWN_LENGTH) boolquery1 = Bool() boolquery1.should(Match(field='term', query=searchString1.lower(), type='phrase_prefix', fuzziness='AUTO')) boolquery1.should(Match(field='term.folded', query=searchString1.lower(), type='phrase_prefix', fuzziness='AUTO')) boolquery1.should(Match(field='term.folded', query=searchString1.lower(), fuzziness='AUTO')) query1.add_query(boolquery1) results1 = query1.search(index='term', doc_type='value') conceptid1 = '' context1 = '' data_type = nodes['GRAVE_MEASUREMENT_TYPE_E55__value'] for result1 in results1['hits']['hits']: #print result1result1['_source']['ids'][0] conceptid1 = result1['_source']['options'] valueid1 = result1['_source']['ids'][0] if nodes['GRAVE_MEASUREMENT_TYPE_E55__value'] == valueid1: #print 'Nasel: ' + conceptid1['conceptid'] data_type = conceptid1['conceptid'] document['value_' + data_type] = float(nodes['VALUE_OF_MEASUREMENT_E60__value']) for nodes in self.get_nodes('OBJECT_MEASUREMENT_TYPE.E55', keys=['value','label']): # Poiscemo in shranimo le contextid (sicer je v vsakem jeziku drugacna vrednost) lang = settings.LANGUAGE_CODE se1 = SearchEngineFactory().create() context_label1 = '-' search_context = {} #print 'Iscem podatke za ' + nodes['GRAVE_MEASUREMENT_TYPE_E55__value'] searchString1 = nodes['OBJECT_MEASUREMENT_TYPE_E55__label'] query1 = Query(se1, start=0, limit=settings.SEARCH_DROPDOWN_LENGTH) boolquery1 = Bool() boolquery1.should(Match(field='term', query=searchString1.lower(), type='phrase_prefix', fuzziness='AUTO')) boolquery1.should(Match(field='term.folded', query=searchString1.lower(), type='phrase_prefix', fuzziness='AUTO')) boolquery1.should(Match(field='term.folded', query=searchString1.lower(), fuzziness='AUTO')) query1.add_query(boolquery1) results1 = query1.search(index='term', doc_type='value') conceptid1 = '' context1 = '' data_type = nodes['OBJECT_MEASUREMENT_TYPE_E55__value'] for result1 in results1['hits']['hits']: #print result1result1['_source']['ids'][0] conceptid1 = result1['_source']['options'] valueid1 = result1['_source']['ids'][0] if nodes['OBJECT_MEASUREMENT_TYPE_E55__value'] == valueid1: #print 'Nasel: ' + conceptid1['conceptid'] data_type = conceptid1['conceptid'] document['value_' + data_type] = float(nodes['VALUE_OF_MEASUREMENT_E60__value']) #print document #for nodes in self.get_nodes('GRAVE_MEASUREMENT_TYPE.E55', keys=['value']): # document['measurement_groups'].append({ # 'conceptid': nodes['GRAVE_MEASUREMENT_TYPE_E55__value'], # 'value': nodes['VALUE_OF_MEASUREMENT_E60__value'] # }) #for nodes in self.get_nodes('OBJECT_MEASUREMENT_TYPE.E55', keys=['value']): # document['measurement_groups'].append({ # 'conceptid': nodes['OBJECT_MEASUREMENT_TYPE_E55__value'], # 'value': nodes['VALUE_OF_MEASUREMENT_E60__value'] # }) if self.entitytypeid == 'HERITAGE_RESOURCE.E18' or self.entitytypeid == 'SITE.E18' or self.entitytypeid == 'GRAVE.E18' or self.entitytypeid == 'OBJECT.E18': document['searchType'] = self.get_current_type() #document['parentName'] = self.get_parent_name() #document_data['designations'] = get_entity_data('TYPE_OF_DESIGNATION_OR_PROTECTION.E55', get_label=True) if self.get_nodes('SPATIAL_COORDINATES_GEOMETRY.E47', keys=['value']): point = self.get_nodes('SPATIAL_COORDINATES_GEOMETRY.E47', keys=['value'])[0]['SPATIAL_COORDINATES_GEOMETRY_E47__value'] if not isinstance(point, basestring): point = str(point) if point.find('POINT')>=0: lon = point[6:point.find(' ', 7)] #print lon lat = point[point.find(' ',7)+1:point.find(')')] #print lat document['longitude'] = lon document['latitude'] = lat return documents
def build_search_results_dsl(request): term_filter = request.GET.get('termFilter', '') spatial_filter = JSONDeserializer().deserialize(request.GET.get('mapFilter', '{}')) export = request.GET.get('export', None) page = 1 if request.GET.get('page') == '' else int(request.GET.get('page', 1)) temporal_filter = JSONDeserializer().deserialize(request.GET.get('temporalFilter', '{}')) se = SearchEngineFactory().create() if export != None: limit = settings.SEARCH_EXPORT_ITEMS_PER_PAGE else: limit = settings.SEARCH_ITEMS_PER_PAGE query = Query(se, start=limit*int(page-1), limit=limit) query.add_aggregation(GeoHashGridAgg(field='points', name='grid', precision=settings.HEX_BIN_PRECISION)) query.add_aggregation(GeoBoundsAgg(field='points', name='bounds')) search_query = Bool() if term_filter != '': for term in JSONDeserializer().deserialize(term_filter): if term['type'] == 'term': term_filter = Match(field='strings', query=term['value'], type='phrase') if term['inverted']: search_query.must_not(term_filter) else: search_query.must(term_filter) elif term['type'] == 'concept': concept_ids = _get_child_concepts(term['value']) conceptid_filter = Terms(field='domains.conceptid', terms=concept_ids) if term['inverted']: search_query.must_not(conceptid_filter) else: search_query.must(conceptid_filter) elif term['type'] == 'string': string_filter = Bool() string_filter.should(Match(field='strings', query=term['value'], type='phrase_prefix')) string_filter.should(Match(field='strings.folded', query=term['value'], type='phrase_prefix')) if term['inverted']: search_query.must_not(string_filter) else: search_query.must(string_filter) if 'features' in spatial_filter: if len(spatial_filter['features']) > 0: feature_geom = spatial_filter['features'][0]['geometry'] feature_properties = spatial_filter['features'][0]['properties'] buffer = {'width':0,'unit':'ft'} if 'buffer' in feature_properties: buffer = feature_properties['buffer'] feature_geom = JSONDeserializer().deserialize(_buffer(feature_geom,buffer['width'],buffer['unit']).json) geoshape = GeoShape(field='geometries.features.geometry', type=feature_geom['type'], coordinates=feature_geom['coordinates'] ) invert_spatial_search = False if 'inverted' in feature_properties: invert_spatial_search = feature_properties['inverted'] if invert_spatial_search == True: search_query.must_not(geoshape) else: search_query.must(geoshape) if 'fromDate' in temporal_filter and 'toDate' in temporal_filter: now = str(datetime.utcnow()) start_date = None end_date = None start_year = 'null' end_year = 'null' try: # start_date = parser.parse(temporal_filter['fromDate']) # start_date = start_date.isoformat() sd = FlexiDate.from_str(temporal_filter['fromDate']) start_date = int((sd.as_float()-1970)*31556952*1000) #start_year = parser.parse(start_date).year start_year = sd.year except: pass try: # end_date = parser.parse(temporal_filter['toDate']) # end_date = end_date.isoformat() ed = FlexiDate.from_str(temporal_filter['toDate']) end_date = int((ed.as_float()-1970)*31556952*1000) #end_year = parser.parse(end_date).year end_year = ed.year except: pass # add filter for concepts that define min or max dates sql = None basesql = """ SELECT value.conceptid FROM ( SELECT {select_clause}, v.conceptid FROM public."values" v, public."values" v2 WHERE v.conceptid = v2.conceptid and v.valuetype = 'min_year' and v2.valuetype = 'max_year' ) as value WHERE overlap = true; """ temporal_query = Bool() if 'inverted' not in temporal_filter: temporal_filter['inverted'] = False if temporal_filter['inverted']: # inverted date searches need to use an OR clause and are generally more complicated to structure (can't use ES must_not) # eg: less than START_DATE OR greater than END_DATE select_clause = [] inverted_date_filter = Bool() field = 'dates' if 'dateNodeId' in temporal_filter and temporal_filter['dateNodeId'] != '': field='tiles.data.%s' % (temporal_filter['dateNodeId']) if start_date is not None: inverted_date_filter.should(Range(field=field, lte=start_date)) select_clause.append("(numrange(v.value::int, v2.value::int, '[]') && numrange(null,{start_year},'[]'))") if end_date is not None: inverted_date_filter.should(Range(field=field, gte=end_date)) select_clause.append("(numrange(v.value::int, v2.value::int, '[]') && numrange({end_year},null,'[]'))") if 'dateNodeId' in temporal_filter and temporal_filter['dateNodeId'] != '': date_range_query = Nested(path='tiles', query=inverted_date_filter) temporal_query.should(date_range_query) else: temporal_query.should(inverted_date_filter) select_clause = " or ".join(select_clause) + " as overlap" sql = basesql.format(select_clause=select_clause).format(start_year=start_year, end_year=end_year) else: if 'dateNodeId' in temporal_filter and temporal_filter['dateNodeId'] != '': range = Range(field='tiles.data.%s' % (temporal_filter['dateNodeId']), gte=start_date, lte=end_date) date_range_query = Nested(path='tiles', query=range) temporal_query.should(date_range_query) else: date_range_query = Range(field='dates', gte=start_date, lte=end_date) temporal_query.should(date_range_query) select_clause = """ numrange(v.value::int, v2.value::int, '[]') && numrange({start_year},{end_year},'[]') as overlap """ sql = basesql.format(select_clause=select_clause).format(start_year=start_year, end_year=end_year) # is a dateNodeId is not specified if sql is not None: cursor = connection.cursor() cursor.execute(sql) ret = [str(row[0]) for row in cursor.fetchall()] if len(ret) > 0: conceptid_filter = Terms(field='domains.conceptid', terms=ret) temporal_query.should(conceptid_filter) search_query.must(temporal_query) query.add_query(search_query) return query
def build_base_search_results_dsl(request): term_filter = request.GET.get('termFilter', '') spatial_filter = JSONDeserializer().deserialize(request.GET.get('spatialFilter', None)) export = request.GET.get('export', None) page = 1 if request.GET.get('page') == '' else int(request.GET.get('page', 1)) temporal_filter = JSONDeserializer().deserialize(request.GET.get('temporalFilter', None)) se = SearchEngineFactory().create() if export != None: limit = settings.SEARCH_EXPORT_ITEMS_PER_PAGE else: limit = settings.SEARCH_ITEMS_PER_PAGE query = Query(se, start=limit*int(page-1), limit=limit) boolquery = Bool() boolfilter = Bool() if term_filter != '': # Ce uporabnik ni avtenticiran, prikazemo le veljavne (to je verjetno potrebno se dodelati (mogoce da vidijo le svoje???)!!!) if (request.user.username == 'anonymous'): auto_filter = [] for item in JSONDeserializer().deserialize(term_filter): auto_filter.append(item) # Poiscimo concept id in context za Published status AUTO_TERM_FILTER = get_auto_filter(request) auto_filter.append(AUTO_TERM_FILTER) term_filter = JSONSerializer().serialize(auto_filter) print 'term_filter' if term_filter != '': for term in JSONDeserializer().deserialize(term_filter): print term if term['type'] == 'term': entitytype = models.EntityTypes.objects.get(conceptid_id=term['context']) boolfilter_nested = Bool() boolfilter_nested.must(Terms(field='child_entities.entitytypeid', terms=[entitytype.pk])) boolfilter_nested.must(Match(field='child_entities.value', query=term['value'], type='phrase')) nested = Nested(path='child_entities', query=boolfilter_nested) if term['inverted']: boolfilter.must_not(nested) else: boolfilter.must(nested) elif term['type'] == 'concept': concept_ids = _get_child_concepts(term['value']) terms = Terms(field='domains.conceptid', terms=concept_ids) nested = Nested(path='domains', query=terms) if term['inverted']: boolfilter.must_not(nested) else: boolfilter.must(nested) elif term['type'] == 'string': boolfilter_folded = Bool() boolfilter_folded.should(Match(field='child_entities.value', query=term['value'], type='phrase_prefix')) boolfilter_folded.should(Match(field='child_entities.value.folded', query=term['value'], type='phrase_prefix')) nested = Nested(path='child_entities', query=boolfilter_folded) if term['inverted']: boolquery.must_not(nested) else: boolquery.must(nested) if 'geometry' in spatial_filter and 'type' in spatial_filter['geometry'] and spatial_filter['geometry']['type'] != '': geojson = spatial_filter['geometry'] if geojson['type'] == 'bbox': coordinates = [[geojson['coordinates'][0],geojson['coordinates'][3]], [geojson['coordinates'][2],geojson['coordinates'][1]]] geoshape = GeoShape(field='geometries.value', type='envelope', coordinates=coordinates ) nested = Nested(path='geometries', query=geoshape) else: buffer = spatial_filter['buffer'] geojson = JSONDeserializer().deserialize(_buffer(geojson,buffer['width'],buffer['unit']).json) geoshape = GeoShape(field='geometries.value', type=geojson['type'], coordinates=geojson['coordinates'] ) nested = Nested(path='geometries', query=geoshape) if 'inverted' not in spatial_filter: spatial_filter['inverted'] = False if spatial_filter['inverted']: boolfilter.must_not(nested) else: boolfilter.must(nested) if 'year_min_max' in temporal_filter and len(temporal_filter['year_min_max']) == 2: start_date = date(temporal_filter['year_min_max'][0], 1, 1) end_date = date(temporal_filter['year_min_max'][1], 12, 31) if start_date: start_date = start_date.isoformat() if end_date: end_date = end_date.isoformat() range = Range(field='dates.value', gte=start_date, lte=end_date) nested = Nested(path='dates', query=range) if 'inverted' not in temporal_filter: temporal_filter['inverted'] = False if temporal_filter['inverted']: boolfilter.must_not(nested) else: boolfilter.must(nested) if not boolquery.empty: query.add_query(boolquery) if not boolfilter.empty: query.add_filter(boolfilter) return query
def build_search_results_dsl(request): # Results are sorted ascendingly by the value of SITE_ID.E42, which is displayed as primary name of Heritage Resources. # Must go back to this method once new Automatic Resource ID has been fully developed (AZ 10/08/16) Update 06/09/16: EAMENA_ID.E42 now used as sorting criterion. sorting = { "child_entities.label": { "order" : "asc", "nested_path": "child_entities", "nested_filter": { "term": {"child_entities.entitytypeid" : "EAMENA_ID.E42"} } } } term_filter = request.GET.get('termFilter', '') spatial_filter = JSONDeserializer().deserialize(request.GET.get('spatialFilter', None)) export = request.GET.get('export', None) page = 1 if request.GET.get('page') == '' else int(request.GET.get('page', 1)) temporal_filter = JSONDeserializer().deserialize(request.GET.get('temporalFilter', None)) boolean_search = request.GET.get('booleanSearch', '') filter_and_or = JSONDeserializer().deserialize(request.GET.get('termFilterAndOr', '')) filter_grouping = JSONDeserializer().deserialize(request.GET.get('termFilterGroup', '')) filter_combine_flags = JSONDeserializer().deserialize(request.GET.get('termFilterCombineWithPrev', '')) #Ignore first entry as it is a dummy filter_combine_flags = filter_combine_flags[1:] # filter_combine_flags = [False, True, False, False, False] # filter_groups = JSONDeserializer().deserialize(request.GET.get('termFilterGroups', '')) # Not here yet, so put in some bogus data # filter_groups = [ # 'NAME.E41', # 'NAME.E41', # 'DISTURBANCE_STATE.E3', # 'THREAT_STATE.E3' # ] se = SearchEngineFactory().create() if export != None: limit = settings.SEARCH_EXPORT_ITEMS_PER_PAGE else: limit = settings.SEARCH_ITEMS_PER_PAGE query = Query(se, start=limit*int(page-1), limit=limit) boolquery = Bool() boolfilter = Bool() is_empty_temporal_filter = True # store each search term in an initially. These will be combined based on the global and/or and the optional groupings terms_queries = []; # logging.warning("-------QUERY-------") if term_filter != '' or not is_empty_temporal_filter: for index, select_box in enumerate(JSONDeserializer().deserialize(term_filter)): selectbox_boolfilter = Bool() groupid = filter_grouping[index] if not groupid == 'No group': # build a nested query against the nested_entities # trace the path from each term to the group root term_paths = [] for term in select_box: # trace path from group root to this term if term['type'] == 'concept': # get the parent concept for this value i.e. the field term_parent_concept = Concept.get_parent_concept(term['value']) # get the steps from the root to that concept if term_parent_concept.nodetype.nodetype == "Collection": term_schema = Entity.get_mapping_schema_to(term_parent_concept.legacyoid) elif term_parent_concept.nodetype.nodetype == 'Concept': # need to get at the parent until we reach the root collection. concepts are arranged hierarchically parent_relations_to = models.ConceptRelations.objects.filter(conceptidto=term_parent_concept.conceptid, relationtype='member') grandparent = models.Concepts.objects.filter(conceptid=parent_relations_to[0].conceptidfrom) term_schema = Entity.get_mapping_schema_to(grandparent[0].legacyoid) #this path begins at the root, and ends up at the node in question term_path = term_schema['HERITAGE_RESOURCE_GROUP.E27']['steps'] term_paths.append({ 'term': term, 'path': term_path }) elif term['type'] == 'term': concept = models.Concepts.objects.get(conceptid=term['context']) term_schema = Entity.get_mapping_schema_to(concept.legacyoid) term_path = term_schema['HERITAGE_RESOURCE_GROUP.E27']['steps'] term_paths.append({ 'term': term, 'path': term_path }) elif term['type'] == 'string': term_schema = Entity.get_mapping_schema_to(groupid) term_path = term_schema['HERITAGE_RESOURCE_GROUP.E27']['steps'] term_paths.append({ 'term': term, 'path': term_path }) if 'year_min_max' in temporal_filter[index] and len(temporal_filter[index]['year_min_max']) == 2: start_date = date(temporal_filter[index]['year_min_max'][0], 1, 1) end_date = date(temporal_filter[index]['year_min_max'][1], 12, 31) if start_date: start_date = start_date.isoformat() if end_date: end_date = end_date.isoformat() if 'inverted' not in temporal_filter[index]: inverted_temporal_filter = False else: if temporal_filter[index]['inverted']: inverted_temporal_filter = True else: inverted_temporal_filter = False term_paths.append({ 'term': { 'date_operator': '3', 'start_date': start_date, 'end_date': end_date, 'type': 'date', 'inverted': inverted_temporal_filter }, 'path': term_path }) if 'filters' in temporal_filter[index]: term_schema = Entity.get_mapping_schema_to(groupid) term_path = term_schema['HERITAGE_RESOURCE_GROUP.E27']['steps'] for temporal_filter_item in temporal_filter[index]['filters']: date_type = '' searchdate = '' date_operator = '' for node in temporal_filter_item['nodes']: if node['entitytypeid'] == 'DATE_COMPARISON_OPERATOR.E55': date_operator = node['value'] elif node['entitytypeid'] == 'date': searchdate = node['value'] else: date_type = node['value'] date_value = datetime.strptime(searchdate, '%Y-%m-%d').isoformat() if 'inverted' not in temporal_filter[index]: inverted_temporal_filter = False else: if temporal_filter[index]['inverted']: inverted_temporal_filter = True else: inverted_temporal_filter = False term_paths.append({ 'term': { 'date_operator': date_operator, 'date_value': date_value, 'type': 'date', 'inverted': inverted_temporal_filter }, 'path': term_path }) # combine the traced path to build a nested query group_query = nested_query_from_pathed_values(term_paths, 'nested_entity.child_entities') # add nested query to overall query selectbox_boolfilter.must(group_query) # logging.warning("BOX QUERY - %s", JSONSerializer().serialize(selectbox_boolfilter, indent=2)) else: for term in select_box: if term['type'] == 'term': entitytype = models.EntityTypes.objects.get(conceptid_id=term['context']) boolfilter_nested = Bool() boolfilter_nested.must(Terms(field='child_entities.entitytypeid', terms=[entitytype.pk])) boolfilter_nested.must(Match(field='child_entities.value', query=term['value'], type='phrase')) nested = Nested(path='child_entities', query=boolfilter_nested) if filter_and_or[index] == 'or': if not term['inverted']: selectbox_boolfilter.should(nested) else: if term['inverted']: selectbox_boolfilter.must_not(nested) else: selectbox_boolfilter.must(nested) elif term['type'] == 'concept': concept_ids = _get_child_concepts(term['value']) terms = Terms(field='domains.conceptid', terms=concept_ids) nested = Nested(path='domains', query=terms) if filter_and_or[index] == 'or': if not term['inverted']: selectbox_boolfilter.should(nested) else: if term['inverted']: selectbox_boolfilter.must_not(nested) else: selectbox_boolfilter.must(nested) elif term['type'] == 'string': boolquery2 = Bool() #This bool contains the subset of nested string queries on both domains and child_entities paths boolfilter_folded = Bool() #This bool searches by string in child_entities, where free text strings get indexed boolfilter_folded2 = Bool() #This bool searches by string in the domains path,where controlled vocabulary concepts get indexed boolfilter_folded.should(Match(field='child_entities.value', query=term['value'], type='phrase_prefix', fuzziness='AUTO', operator='and')) boolfilter_folded.should(Match(field='child_entities.value.folded', query=term['value'], type='phrase_prefix', fuzziness='AUTO', operator='and')) boolfilter_folded.should(Match(field='child_entities.value.folded', query=term['value'], fuzziness='AUTO', operator='and')) nested = Nested(path='child_entities', query=boolfilter_folded) boolfilter_folded2.should(Match(field='domains.label', query=term['value'], type='phrase_prefix', fuzziness='AUTO', operator='and')) boolfilter_folded2.should(Match(field='domains.label.folded', query=term['value'], type='phrase_prefix', fuzziness='AUTO', operator='and')) boolfilter_folded2.should(Match(field='domains.label.folded', query=term['value'], fuzziness='AUTO', operator='and')) nested2 = Nested(path='domains', query=boolfilter_folded2) boolquery2.should(nested) boolquery2.should(nested2) if filter_and_or[index] == 'or': if not term['inverted']: # use boolfilter here instead of boolquery because boolquery # can't be combined with other boolfilters using boolean OR selectbox_boolfilter.should(boolquery2) else: if term['inverted']: selectbox_boolfilter.must_not(boolquery2) else: selectbox_boolfilter.must(boolquery2) if 'year_min_max' in temporal_filter[index] and len(temporal_filter[index]['year_min_max']) == 2: start_date = date(temporal_filter[index]['year_min_max'][0], 1, 1) end_date = date(temporal_filter[index]['year_min_max'][1], 12, 31) if start_date: start_date = start_date.isoformat() if end_date: end_date = end_date.isoformat() range = Range(field='dates.value', gte=start_date, lte=end_date) nested = Nested(path='dates', query=range) if 'inverted' not in temporal_filter[index]: temporal_filter[index]['inverted'] = False if temporal_filter[index]['inverted']: selectbox_boolfilter.must_not(nested) else: selectbox_boolfilter.must(nested) if 'filters' in temporal_filter[index]: for temporal_filter_item in temporal_filter[index]['filters']: date_type = '' searchdate = '' date_operator = '' for node in temporal_filter_item['nodes']: if node['entitytypeid'] == 'DATE_COMPARISON_OPERATOR.E55': date_operator = node['value'] elif node['entitytypeid'] == 'date': searchdate = node['value'] else: date_type = node['value'] date_value = datetime.strptime(searchdate, '%Y-%m-%d').isoformat() if date_operator == '1': # equals query range = Range(field='dates.value', gte=date_value, lte=date_value) elif date_operator == '0': # greater than query range = Range(field='dates.value', lt=date_value) elif date_operator == '2': # less than query range = Range(field='dates.value', gt=date_value) nested = Nested(path='dates', query=range) if 'inverted' not in temporal_filter[index]: temporal_filter[index]['inverted'] = False if temporal_filter[index]['inverted']: selectbox_boolfilter.must_not(nested) else: selectbox_boolfilter.must(nested) terms_queries.append(selectbox_boolfilter) # if not selectbox_boolfilter.empty: # if boolean_search == 'or': # boolfilter.should(selectbox_boolfilter) # else: # boolfilter.must(selectbox_boolfilter) # We now have individual query terms for each of the search components. Combine into one group now # Start by building a an array of groups which will be combined according to the global And/Or # Queries within one of these groups will be combined by the complement of the global And/Or # We may end up with [ [A,B], [C], [D,E] ], which would translate to either: # (A || B) && C && (D || E) # or # (A && B) || C || (D && E) # for global AND or OR respectively # logging.warning("TERMS QUERIES %s", terms_queries) bool_components = []; for i, term_query in enumerate(terms_queries): if i is 0: bool_components.append([term_query]) else: should_group_with_previous = filter_combine_flags[i-1] if should_group_with_previous: bool_components[-1].append(term_query) else: bool_components.append([term_query]) # logging.warning("BOOL COMPONENTS %s", bool_components) # Now build the ES queries for bool_component in bool_components: if len(bool_component) is 1: # just combine this on its own q = bool_component[0] else: q = Bool() for sub_component in bool_component: if boolean_search == 'or': #apply the OPPOSITE of the global boolean operator q.must(sub_component) else: q.should(sub_component) # combine to the overall query according to the global boolean operator if boolean_search == 'or': boolfilter.should(q) else: boolfilter.must(q) if 'geometry' in spatial_filter and 'type' in spatial_filter['geometry'] and spatial_filter['geometry']['type'] != '': geojson = spatial_filter['geometry'] if geojson['type'] == 'bbox': coordinates = [[geojson['coordinates'][0],geojson['coordinates'][3]], [geojson['coordinates'][2],geojson['coordinates'][1]]] geoshape = GeoShape(field='geometries.value', type='envelope', coordinates=coordinates ) nested = Nested(path='geometries', query=geoshape) else: buffer = spatial_filter['buffer'] geojson = JSONDeserializer().deserialize(_buffer(geojson,buffer['width'],buffer['unit']).json) geoshape = GeoShape(field='geometries.value', type=geojson['type'], coordinates=geojson['coordinates'] ) nested = Nested(path='geometries', query=geoshape) if 'inverted' not in spatial_filter: spatial_filter['inverted'] = False if spatial_filter['inverted']: boolfilter.must_not(nested) else: boolfilter.must(nested) if not boolquery.empty: query.add_query(boolquery) if not boolfilter.empty: query.add_filter(boolfilter) # Sorting criterion added to query (AZ 10/08/16) query.dsl.update({'sort': sorting}) # logging.warning("-=-==-=-===-=--=-==-=-===-=- query: -=-==-=-===-=--=-==-=-===-=-> %s", query) return query