def append_dsl(self, search_results_object, permitted_nodegroups, include_provisional): nested_agg = NestedAgg(path='points', name='geo_aggs') nested_agg_filter = FiltersAgg(name='inner') geo_agg_filter = Bool() if include_provisional is True: geo_agg_filter.filter( Terms(field='points.provisional', terms=['false', 'true'])) else: if include_provisional is False: geo_agg_filter.filter( Terms(field='points.provisional', terms=['false'])) elif include_provisional is 'only provisional': geo_agg_filter.filter( Terms(field='points.provisional', terms=['true'])) geo_agg_filter.filter( Terms(field='points.nodegroup_id', terms=permitted_nodegroups)) nested_agg_filter.add_filter(geo_agg_filter) nested_agg_filter.add_aggregation( GeoHashGridAgg(field='points.point', name='grid', precision=settings.HEX_BIN_PRECISION)) nested_agg_filter.add_aggregation( GeoBoundsAgg(field='points.point', name='bounds')) nested_agg.add_aggregation(nested_agg_filter) search_results_object['query'].add_aggregation(nested_agg)
def append_dsl(self, search_results_object, permitted_nodegroups, include_provisional): nested_agg = NestedAgg(path="points", name="geo_aggs") nested_agg_filter = FiltersAgg(name="inner") geo_agg_filter = Bool() if include_provisional is True: geo_agg_filter.filter( Terms(field="points.provisional", terms=["false", "true"])) else: if include_provisional is False: geo_agg_filter.filter( Terms(field="points.provisional", terms=["false"])) elif include_provisional is "only provisional": geo_agg_filter.filter( Terms(field="points.provisional", terms=["true"])) geo_agg_filter.filter( Terms(field="points.nodegroup_id", terms=permitted_nodegroups)) nested_agg_filter.add_filter(geo_agg_filter) nested_agg_filter.add_aggregation( GeoHashGridAgg(field="points.point", name="grid", precision=settings.HEX_BIN_PRECISION)) nested_agg_filter.add_aggregation( GeoBoundsAgg(field="points.point", name="bounds")) nested_agg.add_aggregation(nested_agg_filter) search_results_object["query"].add_aggregation(nested_agg)
def get_resource_bounds(node): query = Query(se, start=0, limit=0) search_query = Bool() query.add_query(search_query) query.add_aggregation( GeoBoundsAgg(field='points.point', name='bounds')) results = query.search(index='resource', doc_type=[str(node.graph_id)]) bounds = results['aggregations']['bounds'][ 'bounds'] if 'bounds' in results['aggregations'][ 'bounds'] else None return bounds
def get_resource_bounds(node): query = Query(se, start=0, limit=0) search_query = Bool() query.add_query(search_query) query.add_aggregation( GeoBoundsAgg(field="points.point", name="bounds")) query.add_query( Term(field="graph_id", term=str(node.graph.graphid))) results = query.search(index=RESOURCES_INDEX) bounds = results["aggregations"]["bounds"][ "bounds"] if "bounds" in results["aggregations"][ "bounds"] else None return bounds
def build_search_results_dsl(request): term_filter = request.GET.get('termFilter', '') spatial_filter = JSONDeserializer().deserialize( request.GET.get('mapFilter', '{}')) include_provisional = get_provisional_type(request) export = request.GET.get('export', None) mobile_download = request.GET.get('mobiledownload', None) page = 1 if request.GET.get('page') == '' else int( request.GET.get('page', 1)) temporal_filter = JSONDeserializer().deserialize( request.GET.get('temporalFilter', '{}')) advanced_filters = JSONDeserializer().deserialize( request.GET.get('advanced', '[]')) search_buffer = None se = SearchEngineFactory().create() if export != None: limit = settings.SEARCH_EXPORT_ITEMS_PER_PAGE elif mobile_download != None: limit = settings.MOBILE_DOWNLOAD_RESOURCE_LIMIT else: limit = settings.SEARCH_ITEMS_PER_PAGE query = Query(se, start=limit * int(page - 1), limit=limit) search_query = Bool() nested_agg = NestedAgg(path='points', name='geo_aggs') nested_agg_filter = FiltersAgg(name='inner') if include_provisional == True: nested_agg_filter.add_filter( Terms(field='points.provisional', terms=['false', 'true'])) else: provisional_resource_filter = Bool() if include_provisional == False: provisional_resource_filter.filter( Terms(field='provisional', terms=['false', 'partial'])) nested_agg_filter.add_filter( Terms(field='points.provisional', terms=['false'])) elif include_provisional == 'only provisional': provisional_resource_filter.filter( Terms(field='provisional', terms=['true', 'partial'])) nested_agg_filter.add_filter( Terms(field='points.provisional', terms=['true'])) search_query.must(provisional_resource_filter) nested_agg_filter.add_aggregation( GeoHashGridAgg(field='points.point', name='grid', precision=settings.HEX_BIN_PRECISION)) nested_agg_filter.add_aggregation( GeoBoundsAgg(field='points.point', name='bounds')) nested_agg.add_aggregation(nested_agg_filter) query.add_aggregation(nested_agg) permitted_nodegroups = get_permitted_nodegroups(request.user) if term_filter != '': for term in JSONDeserializer().deserialize(term_filter): term_query = Bool() provisional_term_filter = Bool() if term['type'] == 'term' or term['type'] == 'string': string_filter = Bool() if term['type'] == 'term': string_filter.must( Match(field='strings.string', query=term['value'], type='phrase')) elif term['type'] == 'string': string_filter.should( Match(field='strings.string', query=term['value'], type='phrase_prefix')) string_filter.should( Match(field='strings.string.folded', query=term['value'], type='phrase_prefix')) if include_provisional == False: string_filter.must_not( Match(field='strings.provisional', query='true', type='phrase')) elif include_provisional == 'only provisional': string_filter.must_not( Match(field='strings.provisional', query='false', type='phrase')) string_filter.filter( Terms(field='strings.nodegroup_id', terms=permitted_nodegroups)) nested_string_filter = Nested(path='strings', query=string_filter) if term['inverted']: search_query.must_not(nested_string_filter) else: search_query.must(nested_string_filter) # need to set min_score because the query returns results with score 0 and those have to be removed, which I don't think it should be doing query.min_score('0.01') elif term['type'] == 'concept': concept_ids = _get_child_concepts(term['value']) conceptid_filter = Bool() conceptid_filter.filter( Terms(field='domains.conceptid', terms=concept_ids)) conceptid_filter.filter( Terms(field='domains.nodegroup_id', terms=permitted_nodegroups)) if include_provisional == False: conceptid_filter.must_not( Match(field='domains.provisional', query='true', type='phrase')) elif include_provisional == 'only provisional': conceptid_filter.must_not( Match(field='domains.provisional', query='false', type='phrase')) nested_conceptid_filter = Nested(path='domains', query=conceptid_filter) if term['inverted']: search_query.must_not(nested_conceptid_filter) else: search_query.filter(nested_conceptid_filter) if 'features' in spatial_filter: if len(spatial_filter['features']) > 0: feature_geom = spatial_filter['features'][0]['geometry'] feature_properties = {} if 'properties' in spatial_filter['features'][0]: feature_properties = spatial_filter['features'][0][ 'properties'] buffer = {'width': 0, 'unit': 'ft'} if 'buffer' in feature_properties: buffer = feature_properties['buffer'] search_buffer = _buffer(feature_geom, buffer['width'], buffer['unit']) feature_geom = JSONDeserializer().deserialize(search_buffer.json) geoshape = GeoShape(field='geometries.geom.features.geometry', type=feature_geom['type'], coordinates=feature_geom['coordinates']) invert_spatial_search = False if 'inverted' in feature_properties: invert_spatial_search = feature_properties['inverted'] spatial_query = Bool() if invert_spatial_search == True: spatial_query.must_not(geoshape) else: spatial_query.filter(geoshape) # get the nodegroup_ids that the user has permission to search spatial_query.filter( Terms(field='geometries.nodegroup_id', terms=permitted_nodegroups)) if include_provisional == False: spatial_query.filter( Terms(field='geometries.provisional', terms=['false'])) elif include_provisional == 'only provisional': spatial_query.filter( Terms(field='geometries.provisional', terms=['true'])) search_query.filter(Nested(path='geometries', query=spatial_query)) if 'fromDate' in temporal_filter and 'toDate' in temporal_filter: now = str(datetime.utcnow()) start_date = ExtendedDateFormat(temporal_filter['fromDate']) end_date = ExtendedDateFormat(temporal_filter['toDate']) date_nodeid = str( temporal_filter['dateNodeId'] ) if 'dateNodeId' in temporal_filter and temporal_filter[ 'dateNodeId'] != '' else None query_inverted = False if 'inverted' not in temporal_filter else temporal_filter[ 'inverted'] temporal_query = Bool() if query_inverted: # inverted date searches need to use an OR clause and are generally more complicated to structure (can't use ES must_not) # eg: less than START_DATE OR greater than END_DATE inverted_date_query = Bool() inverted_date_ranges_query = Bool() if start_date.is_valid(): inverted_date_query.should( Range(field='dates.date', lt=start_date.lower)) inverted_date_ranges_query.should( Range(field='date_ranges.date_range', lt=start_date.lower)) if end_date.is_valid(): inverted_date_query.should( Range(field='dates.date', gt=end_date.upper)) inverted_date_ranges_query.should( Range(field='date_ranges.date_range', gt=end_date.upper)) date_query = Bool() date_query.filter(inverted_date_query) date_query.filter( Terms(field='dates.nodegroup_id', terms=permitted_nodegroups)) if include_provisional == False: date_query.filter( Terms(field='dates.provisional', terms=['false'])) elif include_provisional == 'only provisional': date_query.filter( Terms(field='dates.provisional', terms=['true'])) if date_nodeid: date_query.filter(Term(field='dates.nodeid', term=date_nodeid)) else: date_ranges_query = Bool() date_ranges_query.filter(inverted_date_ranges_query) date_ranges_query.filter( Terms(field='date_ranges.nodegroup_id', terms=permitted_nodegroups)) if include_provisional == False: date_ranges_query.filter( Terms(field='date_ranges.provisional', terms=['false'])) elif include_provisional == 'only provisional': date_ranges_query.filter( Terms(field='date_ranges.provisional', terms=['true'])) temporal_query.should( Nested(path='date_ranges', query=date_ranges_query)) temporal_query.should(Nested(path='dates', query=date_query)) else: date_query = Bool() date_query.filter( Range(field='dates.date', gte=start_date.lower, lte=end_date.upper)) date_query.filter( Terms(field='dates.nodegroup_id', terms=permitted_nodegroups)) if include_provisional == False: date_query.filter( Terms(field='dates.provisional', terms=['false'])) elif include_provisional == 'only provisional': date_query.filter( Terms(field='dates.provisional', terms=['true'])) if date_nodeid: date_query.filter(Term(field='dates.nodeid', term=date_nodeid)) else: date_ranges_query = Bool() date_ranges_query.filter( Range(field='date_ranges.date_range', gte=start_date.lower, lte=end_date.upper, relation='intersects')) date_ranges_query.filter( Terms(field='date_ranges.nodegroup_id', terms=permitted_nodegroups)) if include_provisional == False: date_ranges_query.filter( Terms(field='date_ranges.provisional', terms=['false'])) if include_provisional == 'only provisional': date_ranges_query.filter( Terms(field='date_ranges.provisional', terms=['true'])) temporal_query.should( Nested(path='date_ranges', query=date_ranges_query)) temporal_query.should(Nested(path='dates', query=date_query)) search_query.filter(temporal_query) datatype_factory = DataTypeFactory() if len(advanced_filters) > 0: advanced_query = Bool() grouped_query = Bool() grouped_queries = [grouped_query] for index, advanced_filter in enumerate(advanced_filters): tile_query = Bool() for key, val in advanced_filter.iteritems(): if key != 'op': node = models.Node.objects.get(pk=key) if request.user.has_perm('read_nodegroup', node.nodegroup): datatype = datatype_factory.get_instance(node.datatype) datatype.append_search_filters(val, node, tile_query, request) nested_query = Nested(path='tiles', query=tile_query) if advanced_filter['op'] == 'or' and index != 0: grouped_query = Bool() grouped_queries.append(grouped_query) grouped_query.must(nested_query) for grouped_query in grouped_queries: advanced_query.should(grouped_query) search_query.must(advanced_query) query.add_query(search_query) if search_buffer != None: search_buffer = search_buffer.geojson return {'query': query, 'search_buffer': search_buffer}
def build_search_results_dsl(request): term_filter = request.GET.get('termFilter', '') spatial_filter = JSONDeserializer().deserialize( request.GET.get('mapFilter', '{}')) export = request.GET.get('export', None) page = 1 if request.GET.get('page') == '' else int( request.GET.get('page', 1)) temporal_filter = JSONDeserializer().deserialize( request.GET.get('temporalFilter', '{}')) se = SearchEngineFactory().create() if export != None: limit = settings.SEARCH_EXPORT_ITEMS_PER_PAGE else: limit = settings.SEARCH_ITEMS_PER_PAGE query = Query(se, start=limit * int(page - 1), limit=limit) query.add_aggregation( GeoHashGridAgg(field='points', name='grid', precision=settings.HEX_BIN_PRECISION)) query.add_aggregation(GeoBoundsAgg(field='points', name='bounds')) search_query = Bool() if term_filter != '': for term in JSONDeserializer().deserialize(term_filter): if term['type'] == 'term': term_filter = Match(field='strings', query=term['value'], type='phrase') if term['inverted']: search_query.must_not(term_filter) else: search_query.must(term_filter) elif term['type'] == 'concept': concept_ids = _get_child_concepts(term['value']) conceptid_filter = Terms(field='domains.conceptid', terms=concept_ids) if term['inverted']: search_query.must_not(conceptid_filter) else: search_query.must(conceptid_filter) elif term['type'] == 'string': string_filter = Bool() string_filter.should( Match(field='strings', query=term['value'], type='phrase_prefix')) string_filter.should( Match(field='strings.folded', query=term['value'], type='phrase_prefix')) if term['inverted']: search_query.must_not(string_filter) else: search_query.must(string_filter) if 'features' in spatial_filter: if len(spatial_filter['features']) > 0: feature_geom = spatial_filter['features'][0]['geometry'] feature_properties = spatial_filter['features'][0]['properties'] buffer = {'width': 0, 'unit': 'ft'} if 'buffer' in feature_properties: buffer = feature_properties['buffer'] feature_geom = JSONDeserializer().deserialize( _buffer(feature_geom, buffer['width'], buffer['unit']).json) geoshape = GeoShape(field='geometries.features.geometry', type=feature_geom['type'], coordinates=feature_geom['coordinates']) invert_spatial_search = False if 'inverted' in feature_properties: invert_spatial_search = feature_properties['inverted'] if invert_spatial_search == True: search_query.must_not(geoshape) else: search_query.must(geoshape) if 'fromDate' in temporal_filter and 'toDate' in temporal_filter: now = str(datetime.utcnow()) start_date = None end_date = None start_year = 'null' end_year = 'null' try: # start_date = parser.parse(temporal_filter['fromDate']) # start_date = start_date.isoformat() sd = FlexiDate.from_str(temporal_filter['fromDate']) start_date = int((sd.as_float() - 1970) * 31556952 * 1000) #start_year = parser.parse(start_date).year start_year = sd.year except: pass try: # end_date = parser.parse(temporal_filter['toDate']) # end_date = end_date.isoformat() ed = FlexiDate.from_str(temporal_filter['toDate']) end_date = int((ed.as_float() - 1970) * 31556952 * 1000) #end_year = parser.parse(end_date).year end_year = ed.year except: pass # add filter for concepts that define min or max dates sql = None basesql = """ SELECT value.conceptid FROM ( SELECT {select_clause}, v.conceptid FROM public."values" v, public."values" v2 WHERE v.conceptid = v2.conceptid and v.valuetype = 'min_year' and v2.valuetype = 'max_year' ) as value WHERE overlap = true; """ temporal_query = Bool() if 'inverted' not in temporal_filter: temporal_filter['inverted'] = False if temporal_filter['inverted']: # inverted date searches need to use an OR clause and are generally more complicated to structure (can't use ES must_not) # eg: less than START_DATE OR greater than END_DATE select_clause = [] inverted_date_filter = Bool() field = 'dates' if 'dateNodeId' in temporal_filter and temporal_filter[ 'dateNodeId'] != '': field = 'tiles.data.%s' % (temporal_filter['dateNodeId']) if start_date is not None: inverted_date_filter.should(Range(field=field, lte=start_date)) select_clause.append( "(numrange(v.value::int, v2.value::int, '[]') && numrange(null,{start_year},'[]'))" ) if end_date is not None: inverted_date_filter.should(Range(field=field, gte=end_date)) select_clause.append( "(numrange(v.value::int, v2.value::int, '[]') && numrange({end_year},null,'[]'))" ) if 'dateNodeId' in temporal_filter and temporal_filter[ 'dateNodeId'] != '': date_range_query = Nested(path='tiles', query=inverted_date_filter) temporal_query.should(date_range_query) else: temporal_query.should(inverted_date_filter) select_clause = " or ".join(select_clause) + " as overlap" sql = basesql.format(select_clause=select_clause).format( start_year=start_year, end_year=end_year) else: if 'dateNodeId' in temporal_filter and temporal_filter[ 'dateNodeId'] != '': range = Range(field='tiles.data.%s' % (temporal_filter['dateNodeId']), gte=start_date, lte=end_date) date_range_query = Nested(path='tiles', query=range) temporal_query.should(date_range_query) else: date_range_query = Range(field='dates', gte=start_date, lte=end_date) temporal_query.should(date_range_query) select_clause = """ numrange(v.value::int, v2.value::int, '[]') && numrange({start_year},{end_year},'[]') as overlap """ sql = basesql.format(select_clause=select_clause).format( start_year=start_year, end_year=end_year) # is a dateNodeId is not specified if sql is not None: cursor = connection.cursor() cursor.execute(sql) ret = [str(row[0]) for row in cursor.fetchall()] if len(ret) > 0: conceptid_filter = Terms(field='domains.conceptid', terms=ret) temporal_query.should(conceptid_filter) search_query.must(temporal_query) query.add_query(search_query) return query