def append_dsl(self, search_results_object, permitted_nodegroups,
                   include_provisional):
        nested_agg = NestedAgg(path='points', name='geo_aggs')
        nested_agg_filter = FiltersAgg(name='inner')
        geo_agg_filter = Bool()

        if include_provisional is True:
            geo_agg_filter.filter(
                Terms(field='points.provisional', terms=['false', 'true']))

        else:
            if include_provisional is False:
                geo_agg_filter.filter(
                    Terms(field='points.provisional', terms=['false']))

            elif include_provisional is 'only provisional':
                geo_agg_filter.filter(
                    Terms(field='points.provisional', terms=['true']))

        geo_agg_filter.filter(
            Terms(field='points.nodegroup_id', terms=permitted_nodegroups))
        nested_agg_filter.add_filter(geo_agg_filter)
        nested_agg_filter.add_aggregation(
            GeoHashGridAgg(field='points.point',
                           name='grid',
                           precision=settings.HEX_BIN_PRECISION))
        nested_agg_filter.add_aggregation(
            GeoBoundsAgg(field='points.point', name='bounds'))
        nested_agg.add_aggregation(nested_agg_filter)
        search_results_object['query'].add_aggregation(nested_agg)
Beispiel #2
0
    def append_dsl(self, search_results_object, permitted_nodegroups,
                   include_provisional):
        nested_agg = NestedAgg(path="points", name="geo_aggs")
        nested_agg_filter = FiltersAgg(name="inner")
        geo_agg_filter = Bool()

        if include_provisional is True:
            geo_agg_filter.filter(
                Terms(field="points.provisional", terms=["false", "true"]))

        else:
            if include_provisional is False:
                geo_agg_filter.filter(
                    Terms(field="points.provisional", terms=["false"]))

            elif include_provisional is "only provisional":
                geo_agg_filter.filter(
                    Terms(field="points.provisional", terms=["true"]))

        geo_agg_filter.filter(
            Terms(field="points.nodegroup_id", terms=permitted_nodegroups))
        nested_agg_filter.add_filter(geo_agg_filter)
        nested_agg_filter.add_aggregation(
            GeoHashGridAgg(field="points.point",
                           name="grid",
                           precision=settings.HEX_BIN_PRECISION))
        nested_agg_filter.add_aggregation(
            GeoBoundsAgg(field="points.point", name="bounds"))
        nested_agg.add_aggregation(nested_agg_filter)
        search_results_object["query"].add_aggregation(nested_agg)
Beispiel #3
0
 def get_resource_bounds(node):
     query = Query(se, start=0, limit=0)
     search_query = Bool()
     query.add_query(search_query)
     query.add_aggregation(
         GeoBoundsAgg(field='points.point', name='bounds'))
     results = query.search(index='resource',
                            doc_type=[str(node.graph_id)])
     bounds = results['aggregations']['bounds'][
         'bounds'] if 'bounds' in results['aggregations'][
             'bounds'] else None
     return bounds
Beispiel #4
0
 def get_resource_bounds(node):
     query = Query(se, start=0, limit=0)
     search_query = Bool()
     query.add_query(search_query)
     query.add_aggregation(
         GeoBoundsAgg(field="points.point", name="bounds"))
     query.add_query(
         Term(field="graph_id", term=str(node.graph.graphid)))
     results = query.search(index=RESOURCES_INDEX)
     bounds = results["aggregations"]["bounds"][
         "bounds"] if "bounds" in results["aggregations"][
             "bounds"] else None
     return bounds
Beispiel #5
0
def build_search_results_dsl(request):
    term_filter = request.GET.get('termFilter', '')
    spatial_filter = JSONDeserializer().deserialize(
        request.GET.get('mapFilter', '{}'))
    include_provisional = get_provisional_type(request)

    export = request.GET.get('export', None)
    mobile_download = request.GET.get('mobiledownload', None)
    page = 1 if request.GET.get('page') == '' else int(
        request.GET.get('page', 1))
    temporal_filter = JSONDeserializer().deserialize(
        request.GET.get('temporalFilter', '{}'))
    advanced_filters = JSONDeserializer().deserialize(
        request.GET.get('advanced', '[]'))
    search_buffer = None
    se = SearchEngineFactory().create()

    if export != None:
        limit = settings.SEARCH_EXPORT_ITEMS_PER_PAGE
    elif mobile_download != None:
        limit = settings.MOBILE_DOWNLOAD_RESOURCE_LIMIT
    else:
        limit = settings.SEARCH_ITEMS_PER_PAGE

    query = Query(se, start=limit * int(page - 1), limit=limit)
    search_query = Bool()

    nested_agg = NestedAgg(path='points', name='geo_aggs')
    nested_agg_filter = FiltersAgg(name='inner')

    if include_provisional == True:
        nested_agg_filter.add_filter(
            Terms(field='points.provisional', terms=['false', 'true']))

    else:
        provisional_resource_filter = Bool()

        if include_provisional == False:
            provisional_resource_filter.filter(
                Terms(field='provisional', terms=['false', 'partial']))
            nested_agg_filter.add_filter(
                Terms(field='points.provisional', terms=['false']))

        elif include_provisional == 'only provisional':
            provisional_resource_filter.filter(
                Terms(field='provisional', terms=['true', 'partial']))
            nested_agg_filter.add_filter(
                Terms(field='points.provisional', terms=['true']))

        search_query.must(provisional_resource_filter)

    nested_agg_filter.add_aggregation(
        GeoHashGridAgg(field='points.point',
                       name='grid',
                       precision=settings.HEX_BIN_PRECISION))
    nested_agg_filter.add_aggregation(
        GeoBoundsAgg(field='points.point', name='bounds'))
    nested_agg.add_aggregation(nested_agg_filter)
    query.add_aggregation(nested_agg)

    permitted_nodegroups = get_permitted_nodegroups(request.user)

    if term_filter != '':
        for term in JSONDeserializer().deserialize(term_filter):
            term_query = Bool()
            provisional_term_filter = Bool()
            if term['type'] == 'term' or term['type'] == 'string':
                string_filter = Bool()
                if term['type'] == 'term':
                    string_filter.must(
                        Match(field='strings.string',
                              query=term['value'],
                              type='phrase'))
                elif term['type'] == 'string':
                    string_filter.should(
                        Match(field='strings.string',
                              query=term['value'],
                              type='phrase_prefix'))
                    string_filter.should(
                        Match(field='strings.string.folded',
                              query=term['value'],
                              type='phrase_prefix'))

                if include_provisional == False:
                    string_filter.must_not(
                        Match(field='strings.provisional',
                              query='true',
                              type='phrase'))
                elif include_provisional == 'only provisional':
                    string_filter.must_not(
                        Match(field='strings.provisional',
                              query='false',
                              type='phrase'))

                string_filter.filter(
                    Terms(field='strings.nodegroup_id',
                          terms=permitted_nodegroups))
                nested_string_filter = Nested(path='strings',
                                              query=string_filter)
                if term['inverted']:
                    search_query.must_not(nested_string_filter)
                else:
                    search_query.must(nested_string_filter)
                    # need to set min_score because the query returns results with score 0 and those have to be removed, which I don't think it should be doing
                    query.min_score('0.01')
            elif term['type'] == 'concept':
                concept_ids = _get_child_concepts(term['value'])
                conceptid_filter = Bool()
                conceptid_filter.filter(
                    Terms(field='domains.conceptid', terms=concept_ids))
                conceptid_filter.filter(
                    Terms(field='domains.nodegroup_id',
                          terms=permitted_nodegroups))

                if include_provisional == False:
                    conceptid_filter.must_not(
                        Match(field='domains.provisional',
                              query='true',
                              type='phrase'))
                elif include_provisional == 'only provisional':
                    conceptid_filter.must_not(
                        Match(field='domains.provisional',
                              query='false',
                              type='phrase'))

                nested_conceptid_filter = Nested(path='domains',
                                                 query=conceptid_filter)
                if term['inverted']:
                    search_query.must_not(nested_conceptid_filter)
                else:
                    search_query.filter(nested_conceptid_filter)

    if 'features' in spatial_filter:

        if len(spatial_filter['features']) > 0:
            feature_geom = spatial_filter['features'][0]['geometry']
            feature_properties = {}
            if 'properties' in spatial_filter['features'][0]:
                feature_properties = spatial_filter['features'][0][
                    'properties']
            buffer = {'width': 0, 'unit': 'ft'}
            if 'buffer' in feature_properties:
                buffer = feature_properties['buffer']
            search_buffer = _buffer(feature_geom, buffer['width'],
                                    buffer['unit'])
            feature_geom = JSONDeserializer().deserialize(search_buffer.json)
            geoshape = GeoShape(field='geometries.geom.features.geometry',
                                type=feature_geom['type'],
                                coordinates=feature_geom['coordinates'])

            invert_spatial_search = False
            if 'inverted' in feature_properties:
                invert_spatial_search = feature_properties['inverted']

            spatial_query = Bool()
            if invert_spatial_search == True:
                spatial_query.must_not(geoshape)
            else:
                spatial_query.filter(geoshape)

            # get the nodegroup_ids that the user has permission to search
            spatial_query.filter(
                Terms(field='geometries.nodegroup_id',
                      terms=permitted_nodegroups))

            if include_provisional == False:
                spatial_query.filter(
                    Terms(field='geometries.provisional', terms=['false']))

            elif include_provisional == 'only provisional':
                spatial_query.filter(
                    Terms(field='geometries.provisional', terms=['true']))

            search_query.filter(Nested(path='geometries', query=spatial_query))

    if 'fromDate' in temporal_filter and 'toDate' in temporal_filter:
        now = str(datetime.utcnow())
        start_date = ExtendedDateFormat(temporal_filter['fromDate'])
        end_date = ExtendedDateFormat(temporal_filter['toDate'])
        date_nodeid = str(
            temporal_filter['dateNodeId']
        ) if 'dateNodeId' in temporal_filter and temporal_filter[
            'dateNodeId'] != '' else None
        query_inverted = False if 'inverted' not in temporal_filter else temporal_filter[
            'inverted']

        temporal_query = Bool()

        if query_inverted:
            # inverted date searches need to use an OR clause and are generally more complicated to structure (can't use ES must_not)
            # eg: less than START_DATE OR greater than END_DATE
            inverted_date_query = Bool()
            inverted_date_ranges_query = Bool()

            if start_date.is_valid():
                inverted_date_query.should(
                    Range(field='dates.date', lt=start_date.lower))
                inverted_date_ranges_query.should(
                    Range(field='date_ranges.date_range', lt=start_date.lower))
            if end_date.is_valid():
                inverted_date_query.should(
                    Range(field='dates.date', gt=end_date.upper))
                inverted_date_ranges_query.should(
                    Range(field='date_ranges.date_range', gt=end_date.upper))

            date_query = Bool()
            date_query.filter(inverted_date_query)
            date_query.filter(
                Terms(field='dates.nodegroup_id', terms=permitted_nodegroups))

            if include_provisional == False:
                date_query.filter(
                    Terms(field='dates.provisional', terms=['false']))

            elif include_provisional == 'only provisional':
                date_query.filter(
                    Terms(field='dates.provisional', terms=['true']))

            if date_nodeid:
                date_query.filter(Term(field='dates.nodeid', term=date_nodeid))
            else:
                date_ranges_query = Bool()
                date_ranges_query.filter(inverted_date_ranges_query)
                date_ranges_query.filter(
                    Terms(field='date_ranges.nodegroup_id',
                          terms=permitted_nodegroups))

                if include_provisional == False:
                    date_ranges_query.filter(
                        Terms(field='date_ranges.provisional',
                              terms=['false']))

                elif include_provisional == 'only provisional':
                    date_ranges_query.filter(
                        Terms(field='date_ranges.provisional', terms=['true']))

                temporal_query.should(
                    Nested(path='date_ranges', query=date_ranges_query))
            temporal_query.should(Nested(path='dates', query=date_query))

        else:
            date_query = Bool()
            date_query.filter(
                Range(field='dates.date',
                      gte=start_date.lower,
                      lte=end_date.upper))
            date_query.filter(
                Terms(field='dates.nodegroup_id', terms=permitted_nodegroups))

            if include_provisional == False:
                date_query.filter(
                    Terms(field='dates.provisional', terms=['false']))
            elif include_provisional == 'only provisional':
                date_query.filter(
                    Terms(field='dates.provisional', terms=['true']))

            if date_nodeid:
                date_query.filter(Term(field='dates.nodeid', term=date_nodeid))
            else:
                date_ranges_query = Bool()
                date_ranges_query.filter(
                    Range(field='date_ranges.date_range',
                          gte=start_date.lower,
                          lte=end_date.upper,
                          relation='intersects'))
                date_ranges_query.filter(
                    Terms(field='date_ranges.nodegroup_id',
                          terms=permitted_nodegroups))

                if include_provisional == False:
                    date_ranges_query.filter(
                        Terms(field='date_ranges.provisional',
                              terms=['false']))
                if include_provisional == 'only provisional':
                    date_ranges_query.filter(
                        Terms(field='date_ranges.provisional', terms=['true']))

                temporal_query.should(
                    Nested(path='date_ranges', query=date_ranges_query))
            temporal_query.should(Nested(path='dates', query=date_query))

        search_query.filter(temporal_query)

    datatype_factory = DataTypeFactory()
    if len(advanced_filters) > 0:
        advanced_query = Bool()
        grouped_query = Bool()
        grouped_queries = [grouped_query]
        for index, advanced_filter in enumerate(advanced_filters):
            tile_query = Bool()
            for key, val in advanced_filter.iteritems():
                if key != 'op':
                    node = models.Node.objects.get(pk=key)
                    if request.user.has_perm('read_nodegroup', node.nodegroup):
                        datatype = datatype_factory.get_instance(node.datatype)
                        datatype.append_search_filters(val, node, tile_query,
                                                       request)
            nested_query = Nested(path='tiles', query=tile_query)
            if advanced_filter['op'] == 'or' and index != 0:
                grouped_query = Bool()
                grouped_queries.append(grouped_query)
            grouped_query.must(nested_query)
        for grouped_query in grouped_queries:
            advanced_query.should(grouped_query)
        search_query.must(advanced_query)

    query.add_query(search_query)
    if search_buffer != None:
        search_buffer = search_buffer.geojson

    return {'query': query, 'search_buffer': search_buffer}
Beispiel #6
0
def build_search_results_dsl(request):
    term_filter = request.GET.get('termFilter', '')
    spatial_filter = JSONDeserializer().deserialize(
        request.GET.get('mapFilter', '{}'))
    export = request.GET.get('export', None)
    page = 1 if request.GET.get('page') == '' else int(
        request.GET.get('page', 1))
    temporal_filter = JSONDeserializer().deserialize(
        request.GET.get('temporalFilter', '{}'))

    se = SearchEngineFactory().create()

    if export != None:
        limit = settings.SEARCH_EXPORT_ITEMS_PER_PAGE
    else:
        limit = settings.SEARCH_ITEMS_PER_PAGE

    query = Query(se, start=limit * int(page - 1), limit=limit)
    query.add_aggregation(
        GeoHashGridAgg(field='points',
                       name='grid',
                       precision=settings.HEX_BIN_PRECISION))
    query.add_aggregation(GeoBoundsAgg(field='points', name='bounds'))
    search_query = Bool()

    if term_filter != '':
        for term in JSONDeserializer().deserialize(term_filter):
            if term['type'] == 'term':
                term_filter = Match(field='strings',
                                    query=term['value'],
                                    type='phrase')
                if term['inverted']:
                    search_query.must_not(term_filter)
                else:
                    search_query.must(term_filter)
            elif term['type'] == 'concept':
                concept_ids = _get_child_concepts(term['value'])
                conceptid_filter = Terms(field='domains.conceptid',
                                         terms=concept_ids)
                if term['inverted']:
                    search_query.must_not(conceptid_filter)
                else:
                    search_query.must(conceptid_filter)
            elif term['type'] == 'string':
                string_filter = Bool()
                string_filter.should(
                    Match(field='strings',
                          query=term['value'],
                          type='phrase_prefix'))
                string_filter.should(
                    Match(field='strings.folded',
                          query=term['value'],
                          type='phrase_prefix'))
                if term['inverted']:
                    search_query.must_not(string_filter)
                else:
                    search_query.must(string_filter)

    if 'features' in spatial_filter:
        if len(spatial_filter['features']) > 0:
            feature_geom = spatial_filter['features'][0]['geometry']
            feature_properties = spatial_filter['features'][0]['properties']
            buffer = {'width': 0, 'unit': 'ft'}
            if 'buffer' in feature_properties:
                buffer = feature_properties['buffer']
            feature_geom = JSONDeserializer().deserialize(
                _buffer(feature_geom, buffer['width'], buffer['unit']).json)
            geoshape = GeoShape(field='geometries.features.geometry',
                                type=feature_geom['type'],
                                coordinates=feature_geom['coordinates'])

            invert_spatial_search = False
            if 'inverted' in feature_properties:
                invert_spatial_search = feature_properties['inverted']

            if invert_spatial_search == True:
                search_query.must_not(geoshape)
            else:
                search_query.must(geoshape)

    if 'fromDate' in temporal_filter and 'toDate' in temporal_filter:
        now = str(datetime.utcnow())
        start_date = None
        end_date = None
        start_year = 'null'
        end_year = 'null'
        try:
            # start_date = parser.parse(temporal_filter['fromDate'])
            # start_date = start_date.isoformat()
            sd = FlexiDate.from_str(temporal_filter['fromDate'])
            start_date = int((sd.as_float() - 1970) * 31556952 * 1000)

            #start_year = parser.parse(start_date).year
            start_year = sd.year
        except:
            pass

        try:
            # end_date = parser.parse(temporal_filter['toDate'])
            # end_date = end_date.isoformat()
            ed = FlexiDate.from_str(temporal_filter['toDate'])
            end_date = int((ed.as_float() - 1970) * 31556952 * 1000)

            #end_year = parser.parse(end_date).year
            end_year = ed.year
        except:
            pass

        # add filter for concepts that define min or max dates
        sql = None
        basesql = """
            SELECT value.conceptid
            FROM (
                SELECT
                    {select_clause},
                    v.conceptid
                FROM
                    public."values" v,
                    public."values" v2
                WHERE
                    v.conceptid = v2.conceptid and
                    v.valuetype = 'min_year' and
                    v2.valuetype = 'max_year'
            ) as value
            WHERE overlap = true;
        """

        temporal_query = Bool()

        if 'inverted' not in temporal_filter:
            temporal_filter['inverted'] = False

        if temporal_filter['inverted']:
            # inverted date searches need to use an OR clause and are generally more complicated to structure (can't use ES must_not)
            # eg: less than START_DATE OR greater than END_DATE
            select_clause = []
            inverted_date_filter = Bool()

            field = 'dates'
            if 'dateNodeId' in temporal_filter and temporal_filter[
                    'dateNodeId'] != '':
                field = 'tiles.data.%s' % (temporal_filter['dateNodeId'])

            if start_date is not None:
                inverted_date_filter.should(Range(field=field, lte=start_date))
                select_clause.append(
                    "(numrange(v.value::int, v2.value::int, '[]') && numrange(null,{start_year},'[]'))"
                )
            if end_date is not None:
                inverted_date_filter.should(Range(field=field, gte=end_date))
                select_clause.append(
                    "(numrange(v.value::int, v2.value::int, '[]') && numrange({end_year},null,'[]'))"
                )

            if 'dateNodeId' in temporal_filter and temporal_filter[
                    'dateNodeId'] != '':
                date_range_query = Nested(path='tiles',
                                          query=inverted_date_filter)
                temporal_query.should(date_range_query)
            else:
                temporal_query.should(inverted_date_filter)

                select_clause = " or ".join(select_clause) + " as overlap"
                sql = basesql.format(select_clause=select_clause).format(
                    start_year=start_year, end_year=end_year)

        else:
            if 'dateNodeId' in temporal_filter and temporal_filter[
                    'dateNodeId'] != '':
                range = Range(field='tiles.data.%s' %
                              (temporal_filter['dateNodeId']),
                              gte=start_date,
                              lte=end_date)
                date_range_query = Nested(path='tiles', query=range)
                temporal_query.should(date_range_query)
            else:
                date_range_query = Range(field='dates',
                                         gte=start_date,
                                         lte=end_date)
                temporal_query.should(date_range_query)

                select_clause = """
                    numrange(v.value::int, v2.value::int, '[]') && numrange({start_year},{end_year},'[]') as overlap
                """
                sql = basesql.format(select_clause=select_clause).format(
                    start_year=start_year, end_year=end_year)

        # is a dateNodeId is not specified
        if sql is not None:
            cursor = connection.cursor()
            cursor.execute(sql)
            ret = [str(row[0]) for row in cursor.fetchall()]

            if len(ret) > 0:
                conceptid_filter = Terms(field='domains.conceptid', terms=ret)
                temporal_query.should(conceptid_filter)

        search_query.must(temporal_query)

    query.add_query(search_query)
    return query