Exemple #1
0
def search_terms(request):
    lang = request.GET.get('lang', settings.LANGUAGE_CODE)
    se = SearchEngineFactory().create()
    searchString = request.GET.get('q', '')
    user_is_reviewer = request.user.groups.filter(name='Resource Reviewer').exists()

    i = 0
    ret = {}
    for index in ['terms', 'concepts']:
        query = Query(se, start=0, limit=0)
        boolquery = Bool()
        boolquery.should(Match(field='value', query=searchString.lower(), type='phrase_prefix'))
        boolquery.should(Match(field='value.folded', query=searchString.lower(), type='phrase_prefix'))
        boolquery.should(Match(field='value.folded', query=searchString.lower(), fuzziness='AUTO', prefix_length=settings.SEARCH_TERM_SENSITIVITY))

        if user_is_reviewer is False and index == 'terms':
            boolquery.filter(Terms(field='provisional', terms=['false']))

        query.add_query(boolquery)
        base_agg = Aggregation(name='value_agg', type='terms', field='value.raw', size=settings.SEARCH_DROPDOWN_LENGTH, order={"max_score": "desc"})
        nodegroupid_agg = Aggregation(name='nodegroupid', type='terms', field='nodegroupid')
        top_concept_agg = Aggregation(name='top_concept', type='terms', field='top_concept')
        conceptid_agg = Aggregation(name='conceptid', type='terms', field='conceptid')
        max_score_agg = MaxAgg(name='max_score', script='_score')

        top_concept_agg.add_aggregation(conceptid_agg)
        base_agg.add_aggregation(max_score_agg)
        base_agg.add_aggregation(top_concept_agg)
        base_agg.add_aggregation(nodegroupid_agg)
        query.add_aggregation(base_agg)

        ret[index] = []
        results = query.search(index=index)
        for result in results['aggregations']['value_agg']['buckets']:
            if len(result['top_concept']['buckets']) > 0:
                for top_concept in result['top_concept']['buckets']:
                    top_concept_id = top_concept['key']
                    top_concept_label = get_preflabel_from_conceptid(top_concept['key'], lang)['value']
                    for concept in top_concept['conceptid']['buckets']:
                        ret[index].append({
                            'type': 'concept',
                            'context': top_concept_id,
                            'context_label': top_concept_label,
                            'id': i,
                            'text': result['key'],
                            'value': concept['key']
                        })
                    i = i + 1
            else:
                ret[index].append({
                    'type': 'term',
                    'context': '',
                    'context_label': get_resource_model_label(result),
                    'id': i,
                    'text': result['key'],
                    'value': result['key']
                })
                i = i + 1

    return JSONResponse(ret)
Exemple #2
0
 def get_resource_bounds(node):
     query = Query(se, start=0, limit=0)
     search_query = Bool()
     query.add_query(search_query)
     query.add_aggregation(GeoBoundsAgg(field='points.point', name='bounds'))
     results = query.search(index='resource', doc_type=[str(node.graph.pk)])
     bounds = results['aggregations']['bounds']['bounds'] if 'bounds' in results['aggregations']['bounds'] else None
     return bounds
Exemple #3
0
def search_terms(request):
    lang = request.GET.get('lang', settings.LANGUAGE_CODE)
    se = SearchEngineFactory().create()
    searchString = request.GET.get('q', '')
    query = Query(se, start=0, limit=0)
    user_is_reviewer = request.user.groups.filter(name='Resource Reviewer').exists()

    boolquery = Bool()
    boolquery.should(Match(field='value', query=searchString.lower(), type='phrase_prefix', fuzziness='AUTO'))
    boolquery.should(Match(field='value.folded', query=searchString.lower(), type='phrase_prefix', fuzziness='AUTO'))
    boolquery.should(Match(field='value.folded', query=searchString.lower(), fuzziness='AUTO'))

    if user_is_reviewer is False:
        boolquery.filter(Terms(field='provisional', terms=['false']))

    query.add_query(boolquery)
    base_agg = Aggregation(name='value_agg', type='terms', field='value.raw', size=settings.SEARCH_DROPDOWN_LENGTH, order={"max_score": "desc"})
    nodegroupid_agg = Aggregation(name='nodegroupid', type='terms', field='nodegroupid')
    top_concept_agg = Aggregation(name='top_concept', type='terms', field='top_concept')
    conceptid_agg = Aggregation(name='conceptid', type='terms', field='conceptid')
    max_score_agg = MaxAgg(name='max_score', script='_score')

    top_concept_agg.add_aggregation(conceptid_agg)
    base_agg.add_aggregation(max_score_agg)
    base_agg.add_aggregation(top_concept_agg)
    base_agg.add_aggregation(nodegroupid_agg)
    query.add_aggregation(base_agg)
    results = query.search(index='strings') or {'hits': {'hits':[]}}

    i = 0;
    ret = []
    for result in results['aggregations']['value_agg']['buckets']:
        if len(result['top_concept']['buckets']) > 0:
            for top_concept in result['top_concept']['buckets']:
                top_concept_id = top_concept['key']
                top_concept_label = get_preflabel_from_conceptid(top_concept['key'], lang)['value']
                for concept in top_concept['conceptid']['buckets']:
                    ret.append({
                        'type': 'concept',
                        'context': top_concept_id,
                        'context_label': top_concept_label,
                        'id': i,
                        'text': result['key'],
                        'value': concept['key']
                    })
                i = i + 1
        else:
            ret.append({
                'type': 'term',
                'context': '',
                'context_label': get_resource_model_label(result),
                'id': i,
                'text': result['key'],
                'value': result['key']
            })
            i = i + 1

    return JSONResponse(ret)
Exemple #4
0
def search_terms(request):
    lang = request.GET.get('lang', settings.LANGUAGE_CODE)
    se = SearchEngineFactory().create()
    searchString = request.GET.get('q', '')
    query = Query(se, start=0, limit=0)

    boolquery = Bool()
    boolquery.should(Match(field='value', query=searchString.lower(), type='phrase_prefix', fuzziness='AUTO'))
    boolquery.should(Match(field='value.folded', query=searchString.lower(), type='phrase_prefix', fuzziness='AUTO'))
    boolquery.should(Match(field='value.folded', query=searchString.lower(), fuzziness='AUTO'))
    query.add_query(boolquery)

    base_agg = Aggregation(name='value_agg', type='terms', field='value.raw', size=settings.SEARCH_DROPDOWN_LENGTH, order={"max_score": "desc"})
    nodegroupid_agg = Aggregation(name='nodegroupid', type='terms', field='nodegroupid')
    top_concept_agg = Aggregation(name='top_concept', type='terms', field='top_concept')
    conceptid_agg = Aggregation(name='conceptid', type='terms', field='conceptid')
    max_score_agg = MaxAgg(name='max_score', script='_score')

    top_concept_agg.add_aggregation(conceptid_agg)
    base_agg.add_aggregation(max_score_agg)
    base_agg.add_aggregation(top_concept_agg)
    base_agg.add_aggregation(nodegroupid_agg)
    query.add_aggregation(base_agg)

    results = query.search(index='strings') or {'hits': {'hits':[]}}

    i = 0;
    ret = []
    for result in results['aggregations']['value_agg']['buckets']:
        if len(result['top_concept']['buckets']) > 0:
            for top_concept in result['top_concept']['buckets']:
                top_concept_id = top_concept['key']
                top_concept_label = get_preflabel_from_conceptid(top_concept['key'], lang)['value']
                for concept in top_concept['conceptid']['buckets']:
                    ret.append({
                        'type': 'concept',
                        'context': top_concept_id,
                        'context_label': top_concept_label,
                        'id': i,
                        'text': result['key'],
                        'value': concept['key']
                    })
                i = i + 1
        else:
            ret.append({
                'type': 'term',
                'context': '',
                'context_label': '',
                'id': i,
                'text': result['key'],
                'value': result['key']
            })
            i = i + 1

    return JSONResponse(ret)
Exemple #5
0
 def get_resource_bounds(node):
     query = Query(se, start=0, limit=0)
     search_query = Bool()
     query.add_query(search_query)
     query.add_aggregation(
         GeoBoundsAgg(field='points.point', name='bounds'))
     results = query.search(index='resource',
                            doc_type=[str(node.graph_id)])
     bounds = results['aggregations']['bounds'][
         'bounds'] if 'bounds' in results['aggregations'][
             'bounds'] else None
     return bounds
Exemple #6
0
 def get_resource_bounds(node):
     query = Query(se, start=0, limit=0)
     search_query = Bool()
     query.add_query(search_query)
     query.add_aggregation(
         GeoBoundsAgg(field="points.point", name="bounds"))
     query.add_query(
         Term(field="graph_id", term=str(node.graph.graphid)))
     results = query.search(index=RESOURCES_INDEX)
     bounds = results["aggregations"]["bounds"][
         "bounds"] if "bounds" in results["aggregations"][
             "bounds"] else None
     return bounds
Exemple #7
0
def time_wheel_config(request):
    se = SearchEngineFactory().create()
    query = Query(se, limit=0)
    query.add_aggregation(MinAgg(field='dates', format='y'))
    query.add_aggregation(MaxAgg(field='dates', format='y'))
    results = query.search(index='resource')
    if results is not None and results['aggregations']['min_dates']['value'] is not None and results['aggregations']['max_dates']['value'] is not None:
        min_date = int(results['aggregations']['min_dates']['value_as_string'])
        max_date = int(results['aggregations']['max_dates']['value_as_string'])

        # round min and max date to the nearest 1000 years
        min_date = math.ceil(math.fabs(min_date)/1000)*-1000 if min_date < 0 else math.floor(min_date/1000)*1000
        max_date = math.floor(math.fabs(max_date)/1000)*-1000 if max_date < 0 else math.ceil(max_date/1000)*1000

        query = Query(se, limit=0)
        for millennium in range(int(min_date),int(max_date)+1000,1000):
            min_millenium = millennium
            max_millenium = millennium + 1000
            millenium_agg = DateRangeAgg(name="Millennium (%s-%s)"%(min_millenium, max_millenium), field='dates', format='y', min_date=str(min_millenium), max_date=str(max_millenium))

            for century in range(min_millenium,max_millenium,100):
                min_century = century
                max_century = century + 100
                century_aggregation = DateRangeAgg(name="Century (%s-%s)"%(min_century, max_century), field='dates', format='y', min_date=str(min_century), max_date=str(max_century))
                millenium_agg.add_aggregation(century_aggregation)

                for decade in range(min_century,max_century,10):
                    min_decade = decade
                    max_decade = decade + 10
                    decade_aggregation = DateRangeAgg(name="Decade (%s-%s)"%(min_decade, max_decade), field='dates', format='y', min_date=str(min_decade), max_date=str(max_decade))
                    century_aggregation.add_aggregation(decade_aggregation)

            query.add_aggregation(millenium_agg)

        root = d3Item(name='root')
        transformESAggToD3Hierarchy({'buckets':[query.search(index='resource')['aggregations']]}, root)
        return JSONResponse(root, indent=4)
    else:
        return HttpResponseNotFound(_('Error retrieving the time wheel config'))
Exemple #8
0
def search_terms(request):
    lang = request.GET.get("lang", settings.LANGUAGE_CODE)
    se = SearchEngineFactory().create()
    searchString = request.GET.get("q", "")
    user_is_reviewer = user_is_resource_reviewer(request.user)

    i = 0
    ret = {}
    for index in ["terms", "concepts"]:
        query = Query(se, start=0, limit=0)
        boolquery = Bool()
        boolquery.should(
            Match(field="value",
                  query=searchString.lower(),
                  type="phrase_prefix"))
        boolquery.should(
            Match(field="value.folded",
                  query=searchString.lower(),
                  type="phrase_prefix"))
        boolquery.should(
            Match(field="value.folded",
                  query=searchString.lower(),
                  fuzziness="AUTO",
                  prefix_length=settings.SEARCH_TERM_SENSITIVITY))

        if user_is_reviewer is False and index == "terms":
            boolquery.filter(Terms(field="provisional", terms=["false"]))

        query.add_query(boolquery)
        base_agg = Aggregation(name="value_agg",
                               type="terms",
                               field="value.raw",
                               size=settings.SEARCH_DROPDOWN_LENGTH,
                               order={"max_score": "desc"})
        nodegroupid_agg = Aggregation(name="nodegroupid",
                                      type="terms",
                                      field="nodegroupid")
        top_concept_agg = Aggregation(name="top_concept",
                                      type="terms",
                                      field="top_concept")
        conceptid_agg = Aggregation(name="conceptid",
                                    type="terms",
                                    field="conceptid")
        max_score_agg = MaxAgg(name="max_score", script="_score")

        top_concept_agg.add_aggregation(conceptid_agg)
        base_agg.add_aggregation(max_score_agg)
        base_agg.add_aggregation(top_concept_agg)
        base_agg.add_aggregation(nodegroupid_agg)
        query.add_aggregation(base_agg)

        ret[index] = []
        results = query.search(index=index)
        if results is not None:
            for result in results["aggregations"]["value_agg"]["buckets"]:
                if len(result["top_concept"]["buckets"]) > 0:
                    for top_concept in result["top_concept"]["buckets"]:
                        top_concept_id = top_concept["key"]
                        top_concept_label = get_preflabel_from_conceptid(
                            top_concept["key"], lang)["value"]
                        for concept in top_concept["conceptid"]["buckets"]:
                            ret[index].append({
                                "type": "concept",
                                "context": top_concept_id,
                                "context_label": top_concept_label,
                                "id": i,
                                "text": result["key"],
                                "value": concept["key"],
                            })
                        i = i + 1
                else:
                    ret[index].append({
                        "type":
                        "term",
                        "context":
                        "",
                        "context_label":
                        get_resource_model_label(result),
                        "id":
                        i,
                        "text":
                        result["key"],
                        "value":
                        result["key"],
                    })
                    i = i + 1

    return JSONResponse(ret)
Exemple #9
0
def time_wheel_config(request):
    se = SearchEngineFactory().create()
    query = Query(se, limit=0)
    nested_agg = NestedAgg(path='dates', name='min_max_agg')
    nested_agg.add_aggregation(MinAgg(field='dates.date'))
    nested_agg.add_aggregation(MaxAgg(field='dates.date'))
    query.add_aggregation(nested_agg)
    results = query.search(index='resource')

    if results is not None and results['aggregations']['min_max_agg'][
            'min_dates.date']['value'] is not None and results['aggregations'][
                'min_max_agg']['max_dates.date']['value'] is not None:
        min_date = int(results['aggregations']['min_max_agg']['min_dates.date']
                       ['value']) / 10000
        max_date = int(results['aggregations']['min_max_agg']['max_dates.date']
                       ['value']) / 10000
        # round min and max date to the nearest 1000 years
        min_date = math.ceil(math.fabs(min_date) /
                             1000) * -1000 if min_date < 0 else math.floor(
                                 min_date / 1000) * 1000
        max_date = math.floor(math.fabs(max_date) /
                              1000) * -1000 if max_date < 0 else math.ceil(
                                  max_date / 1000) * 1000
        query = Query(se, limit=0)
        range_lookup = {}

        def gen_range_agg(gte=None, lte=None, permitted_nodegroups=None):
            date_query = Bool()
            date_query.filter(
                Range(field='dates.date',
                      gte=gte,
                      lte=lte,
                      relation='intersects'))
            if permitted_nodegroups:
                date_query.filter(
                    Terms(field='dates.nodegroup_id',
                          terms=permitted_nodegroups))
            date_ranges_query = Bool()
            date_ranges_query.filter(
                Range(field='date_ranges.date_range',
                      gte=gte,
                      lte=lte,
                      relation='intersects'))
            if permitted_nodegroups:
                date_ranges_query.filter(
                    Terms(field='date_ranges.nodegroup_id',
                          terms=permitted_nodegroups))
            wrapper_query = Bool()
            wrapper_query.should(
                Nested(path='date_ranges', query=date_ranges_query))
            wrapper_query.should(Nested(path='dates', query=date_query))
            return wrapper_query

        for millennium in range(int(min_date), int(max_date) + 1000, 1000):
            min_millenium = millennium
            max_millenium = millennium + 1000
            millenium_name = "Millennium (%s - %s)" % (min_millenium,
                                                       max_millenium)
            mill_boolquery = gen_range_agg(
                gte=ExtendedDateFormat(min_millenium).lower,
                lte=ExtendedDateFormat(max_millenium).lower,
                permitted_nodegroups=get_permitted_nodegroups(request.user))
            millenium_agg = FiltersAgg(name=millenium_name)
            millenium_agg.add_filter(mill_boolquery)
            range_lookup[millenium_name] = [min_millenium, max_millenium]

            for century in range(min_millenium, max_millenium, 100):
                min_century = century
                max_century = century + 100
                century_name = "Century (%s - %s)" % (min_century, max_century)
                cent_boolquery = gen_range_agg(
                    gte=ExtendedDateFormat(min_century).lower,
                    lte=ExtendedDateFormat(max_century).lower)
                century_agg = FiltersAgg(name=century_name)
                century_agg.add_filter(cent_boolquery)
                millenium_agg.add_aggregation(century_agg)
                range_lookup[century_name] = [min_century, max_century]

                for decade in range(min_century, max_century, 10):
                    min_decade = decade
                    max_decade = decade + 10
                    decade_name = "Decade (%s - %s)" % (min_decade, max_decade)
                    dec_boolquery = gen_range_agg(
                        gte=ExtendedDateFormat(min_decade).lower,
                        lte=ExtendedDateFormat(max_decade).lower)
                    decade_agg = FiltersAgg(name=decade_name)
                    decade_agg.add_filter(dec_boolquery)
                    century_agg.add_aggregation(decade_agg)
                    range_lookup[decade_name] = [min_decade, max_decade]

            query.add_aggregation(millenium_agg)

        root = d3Item(name='root')
        results = {'buckets': [query.search(index='resource')['aggregations']]}
        results_with_ranges = appendDateRanges(results, range_lookup)
        transformESAggToD3Hierarchy(results_with_ranges, root)
        return JSONResponse(root, indent=4)
    else:
        return HttpResponseNotFound(
            _('Error retrieving the time wheel config'))
Exemple #10
0
def build_search_results_dsl(request):
    term_filter = request.GET.get('termFilter', '')
    spatial_filter = JSONDeserializer().deserialize(
        request.GET.get('mapFilter', '{}'))
    include_provisional = get_provisional_type(request)

    export = request.GET.get('export', None)
    mobile_download = request.GET.get('mobiledownload', None)
    page = 1 if request.GET.get('page') == '' else int(
        request.GET.get('page', 1))
    temporal_filter = JSONDeserializer().deserialize(
        request.GET.get('temporalFilter', '{}'))
    advanced_filters = JSONDeserializer().deserialize(
        request.GET.get('advanced', '[]'))
    search_buffer = None
    se = SearchEngineFactory().create()

    if export != None:
        limit = settings.SEARCH_EXPORT_ITEMS_PER_PAGE
    elif mobile_download != None:
        limit = settings.MOBILE_DOWNLOAD_RESOURCE_LIMIT
    else:
        limit = settings.SEARCH_ITEMS_PER_PAGE

    query = Query(se, start=limit * int(page - 1), limit=limit)
    search_query = Bool()

    nested_agg = NestedAgg(path='points', name='geo_aggs')
    nested_agg_filter = FiltersAgg(name='inner')

    if include_provisional == True:
        nested_agg_filter.add_filter(
            Terms(field='points.provisional', terms=['false', 'true']))

    else:
        provisional_resource_filter = Bool()

        if include_provisional == False:
            provisional_resource_filter.filter(
                Terms(field='provisional', terms=['false', 'partial']))
            nested_agg_filter.add_filter(
                Terms(field='points.provisional', terms=['false']))

        elif include_provisional == 'only provisional':
            provisional_resource_filter.filter(
                Terms(field='provisional', terms=['true', 'partial']))
            nested_agg_filter.add_filter(
                Terms(field='points.provisional', terms=['true']))

        search_query.must(provisional_resource_filter)

    nested_agg_filter.add_aggregation(
        GeoHashGridAgg(field='points.point',
                       name='grid',
                       precision=settings.HEX_BIN_PRECISION))
    nested_agg_filter.add_aggregation(
        GeoBoundsAgg(field='points.point', name='bounds'))
    nested_agg.add_aggregation(nested_agg_filter)
    query.add_aggregation(nested_agg)

    permitted_nodegroups = get_permitted_nodegroups(request.user)

    if term_filter != '':
        for term in JSONDeserializer().deserialize(term_filter):
            term_query = Bool()
            provisional_term_filter = Bool()
            if term['type'] == 'term' or term['type'] == 'string':
                string_filter = Bool()
                if term['type'] == 'term':
                    string_filter.must(
                        Match(field='strings.string',
                              query=term['value'],
                              type='phrase'))
                elif term['type'] == 'string':
                    string_filter.should(
                        Match(field='strings.string',
                              query=term['value'],
                              type='phrase_prefix'))
                    string_filter.should(
                        Match(field='strings.string.folded',
                              query=term['value'],
                              type='phrase_prefix'))

                if include_provisional == False:
                    string_filter.must_not(
                        Match(field='strings.provisional',
                              query='true',
                              type='phrase'))
                elif include_provisional == 'only provisional':
                    string_filter.must_not(
                        Match(field='strings.provisional',
                              query='false',
                              type='phrase'))

                string_filter.filter(
                    Terms(field='strings.nodegroup_id',
                          terms=permitted_nodegroups))
                nested_string_filter = Nested(path='strings',
                                              query=string_filter)
                if term['inverted']:
                    search_query.must_not(nested_string_filter)
                else:
                    search_query.must(nested_string_filter)
                    # need to set min_score because the query returns results with score 0 and those have to be removed, which I don't think it should be doing
                    query.min_score('0.01')
            elif term['type'] == 'concept':
                concept_ids = _get_child_concepts(term['value'])
                conceptid_filter = Bool()
                conceptid_filter.filter(
                    Terms(field='domains.conceptid', terms=concept_ids))
                conceptid_filter.filter(
                    Terms(field='domains.nodegroup_id',
                          terms=permitted_nodegroups))

                if include_provisional == False:
                    conceptid_filter.must_not(
                        Match(field='domains.provisional',
                              query='true',
                              type='phrase'))
                elif include_provisional == 'only provisional':
                    conceptid_filter.must_not(
                        Match(field='domains.provisional',
                              query='false',
                              type='phrase'))

                nested_conceptid_filter = Nested(path='domains',
                                                 query=conceptid_filter)
                if term['inverted']:
                    search_query.must_not(nested_conceptid_filter)
                else:
                    search_query.filter(nested_conceptid_filter)

    if 'features' in spatial_filter:

        if len(spatial_filter['features']) > 0:
            feature_geom = spatial_filter['features'][0]['geometry']
            feature_properties = {}
            if 'properties' in spatial_filter['features'][0]:
                feature_properties = spatial_filter['features'][0][
                    'properties']
            buffer = {'width': 0, 'unit': 'ft'}
            if 'buffer' in feature_properties:
                buffer = feature_properties['buffer']
            search_buffer = _buffer(feature_geom, buffer['width'],
                                    buffer['unit'])
            feature_geom = JSONDeserializer().deserialize(search_buffer.json)
            geoshape = GeoShape(field='geometries.geom.features.geometry',
                                type=feature_geom['type'],
                                coordinates=feature_geom['coordinates'])

            invert_spatial_search = False
            if 'inverted' in feature_properties:
                invert_spatial_search = feature_properties['inverted']

            spatial_query = Bool()
            if invert_spatial_search == True:
                spatial_query.must_not(geoshape)
            else:
                spatial_query.filter(geoshape)

            # get the nodegroup_ids that the user has permission to search
            spatial_query.filter(
                Terms(field='geometries.nodegroup_id',
                      terms=permitted_nodegroups))

            if include_provisional == False:
                spatial_query.filter(
                    Terms(field='geometries.provisional', terms=['false']))

            elif include_provisional == 'only provisional':
                spatial_query.filter(
                    Terms(field='geometries.provisional', terms=['true']))

            search_query.filter(Nested(path='geometries', query=spatial_query))

    if 'fromDate' in temporal_filter and 'toDate' in temporal_filter:
        now = str(datetime.utcnow())
        start_date = ExtendedDateFormat(temporal_filter['fromDate'])
        end_date = ExtendedDateFormat(temporal_filter['toDate'])
        date_nodeid = str(
            temporal_filter['dateNodeId']
        ) if 'dateNodeId' in temporal_filter and temporal_filter[
            'dateNodeId'] != '' else None
        query_inverted = False if 'inverted' not in temporal_filter else temporal_filter[
            'inverted']

        temporal_query = Bool()

        if query_inverted:
            # inverted date searches need to use an OR clause and are generally more complicated to structure (can't use ES must_not)
            # eg: less than START_DATE OR greater than END_DATE
            inverted_date_query = Bool()
            inverted_date_ranges_query = Bool()

            if start_date.is_valid():
                inverted_date_query.should(
                    Range(field='dates.date', lt=start_date.lower))
                inverted_date_ranges_query.should(
                    Range(field='date_ranges.date_range', lt=start_date.lower))
            if end_date.is_valid():
                inverted_date_query.should(
                    Range(field='dates.date', gt=end_date.upper))
                inverted_date_ranges_query.should(
                    Range(field='date_ranges.date_range', gt=end_date.upper))

            date_query = Bool()
            date_query.filter(inverted_date_query)
            date_query.filter(
                Terms(field='dates.nodegroup_id', terms=permitted_nodegroups))

            if include_provisional == False:
                date_query.filter(
                    Terms(field='dates.provisional', terms=['false']))

            elif include_provisional == 'only provisional':
                date_query.filter(
                    Terms(field='dates.provisional', terms=['true']))

            if date_nodeid:
                date_query.filter(Term(field='dates.nodeid', term=date_nodeid))
            else:
                date_ranges_query = Bool()
                date_ranges_query.filter(inverted_date_ranges_query)
                date_ranges_query.filter(
                    Terms(field='date_ranges.nodegroup_id',
                          terms=permitted_nodegroups))

                if include_provisional == False:
                    date_ranges_query.filter(
                        Terms(field='date_ranges.provisional',
                              terms=['false']))

                elif include_provisional == 'only provisional':
                    date_ranges_query.filter(
                        Terms(field='date_ranges.provisional', terms=['true']))

                temporal_query.should(
                    Nested(path='date_ranges', query=date_ranges_query))
            temporal_query.should(Nested(path='dates', query=date_query))

        else:
            date_query = Bool()
            date_query.filter(
                Range(field='dates.date',
                      gte=start_date.lower,
                      lte=end_date.upper))
            date_query.filter(
                Terms(field='dates.nodegroup_id', terms=permitted_nodegroups))

            if include_provisional == False:
                date_query.filter(
                    Terms(field='dates.provisional', terms=['false']))
            elif include_provisional == 'only provisional':
                date_query.filter(
                    Terms(field='dates.provisional', terms=['true']))

            if date_nodeid:
                date_query.filter(Term(field='dates.nodeid', term=date_nodeid))
            else:
                date_ranges_query = Bool()
                date_ranges_query.filter(
                    Range(field='date_ranges.date_range',
                          gte=start_date.lower,
                          lte=end_date.upper,
                          relation='intersects'))
                date_ranges_query.filter(
                    Terms(field='date_ranges.nodegroup_id',
                          terms=permitted_nodegroups))

                if include_provisional == False:
                    date_ranges_query.filter(
                        Terms(field='date_ranges.provisional',
                              terms=['false']))
                if include_provisional == 'only provisional':
                    date_ranges_query.filter(
                        Terms(field='date_ranges.provisional', terms=['true']))

                temporal_query.should(
                    Nested(path='date_ranges', query=date_ranges_query))
            temporal_query.should(Nested(path='dates', query=date_query))

        search_query.filter(temporal_query)

    datatype_factory = DataTypeFactory()
    if len(advanced_filters) > 0:
        advanced_query = Bool()
        grouped_query = Bool()
        grouped_queries = [grouped_query]
        for index, advanced_filter in enumerate(advanced_filters):
            tile_query = Bool()
            for key, val in advanced_filter.iteritems():
                if key != 'op':
                    node = models.Node.objects.get(pk=key)
                    if request.user.has_perm('read_nodegroup', node.nodegroup):
                        datatype = datatype_factory.get_instance(node.datatype)
                        datatype.append_search_filters(val, node, tile_query,
                                                       request)
            nested_query = Nested(path='tiles', query=tile_query)
            if advanced_filter['op'] == 'or' and index != 0:
                grouped_query = Bool()
                grouped_queries.append(grouped_query)
            grouped_query.must(nested_query)
        for grouped_query in grouped_queries:
            advanced_query.should(grouped_query)
        search_query.must(advanced_query)

    query.add_query(search_query)
    if search_buffer != None:
        search_buffer = search_buffer.geojson

    return {'query': query, 'search_buffer': search_buffer}
Exemple #11
0
def time_wheel_config(request):
    se = SearchEngineFactory().create()
    query = Query(se, limit=0)
    nested_agg = NestedAgg(path='dates', name='min_max_agg')
    nested_agg.add_aggregation(MinAgg(field='dates.date'))
    nested_agg.add_aggregation(MaxAgg(field='dates.date'))
    query.add_aggregation(nested_agg)
    results = query.search(index='resource')

    if results is not None and results['aggregations']['min_max_agg']['min_dates.date']['value'] is not None and results['aggregations']['min_max_agg']['max_dates.date']['value'] is not None:
        min_date = int(results['aggregations']['min_max_agg']['min_dates.date']['value'])/10000
        max_date = int(results['aggregations']['min_max_agg']['max_dates.date']['value'])/10000
        # round min and max date to the nearest 1000 years
        min_date = math.ceil(math.fabs(min_date)/1000)*-1000 if min_date < 0 else math.floor(min_date/1000)*1000
        max_date = math.floor(math.fabs(max_date)/1000)*-1000 if max_date < 0 else math.ceil(max_date/1000)*1000
        query = Query(se, limit=0)
        range_lookup = {}

        def gen_range_agg(gte=None, lte=None, permitted_nodegroups=None):
            date_query = Bool()
            date_query.filter(Range(field='dates.date', gte=gte, lte=lte, relation='intersects'))
            if permitted_nodegroups:
                date_query.filter(Terms(field='dates.nodegroup_id', terms=permitted_nodegroups))
            date_ranges_query = Bool()
            date_ranges_query.filter(Range(field='date_ranges.date_range', gte=gte, lte=lte, relation='intersects'))
            if permitted_nodegroups:
                date_ranges_query.filter(Terms(field='date_ranges.nodegroup_id', terms=permitted_nodegroups))
            wrapper_query = Bool()
            wrapper_query.should(Nested(path='date_ranges', query=date_ranges_query))
            wrapper_query.should(Nested(path='dates', query=date_query))
            return wrapper_query

        for millennium in range(int(min_date),int(max_date)+1000,1000):
            min_millenium = millennium
            max_millenium = millennium + 1000
            millenium_name = "Millennium (%s - %s)"%(min_millenium, max_millenium)
            mill_boolquery = gen_range_agg(gte=SortableDate(min_millenium).as_float()-1,
                lte=SortableDate(max_millenium).as_float(),
                permitted_nodegroups=get_permitted_nodegroups(request.user))
            millenium_agg = FiltersAgg(name=millenium_name)
            millenium_agg.add_filter(mill_boolquery)
            range_lookup[millenium_name] = [min_millenium, max_millenium]

            for century in range(min_millenium,max_millenium,100):
                min_century = century
                max_century = century + 100
                century_name="Century (%s - %s)"%(min_century, max_century)
                cent_boolquery = gen_range_agg(gte=SortableDate(min_century).as_float()-1, lte=SortableDate(max_century).as_float())
                century_agg = FiltersAgg(name=century_name)
                century_agg.add_filter(cent_boolquery)
                millenium_agg.add_aggregation(century_agg)
                range_lookup[century_name] = [min_century, max_century]

                for decade in range(min_century,max_century,10):
                    min_decade = decade
                    max_decade = decade + 10
                    decade_name = "Decade (%s - %s)"%(min_decade, max_decade)
                    dec_boolquery = gen_range_agg(gte=SortableDate(min_decade).as_float()-1, lte=SortableDate(max_decade).as_float())
                    decade_agg = FiltersAgg(name=decade_name)
                    decade_agg.add_filter(dec_boolquery)
                    century_agg.add_aggregation(decade_agg)
                    range_lookup[decade_name] = [min_decade, max_decade]

            query.add_aggregation(millenium_agg)

        root = d3Item(name='root')
        results = {'buckets':[query.search(index='resource')['aggregations']]}
        results_with_ranges = appendDateRanges(results, range_lookup)
        transformESAggToD3Hierarchy(results_with_ranges, root)
        return JSONResponse(root, indent=4)
    else:
        return HttpResponseNotFound(_('Error retrieving the time wheel config'))
Exemple #12
0
def build_search_results_dsl(request):
    term_filter = request.GET.get('termFilter', '')
    spatial_filter = JSONDeserializer().deserialize(request.GET.get('mapFilter', '{}'))
    export = request.GET.get('export', None)
    page = 1 if request.GET.get('page') == '' else int(request.GET.get('page', 1))
    temporal_filter = JSONDeserializer().deserialize(request.GET.get('temporalFilter', '{}'))
    advanced_filters = JSONDeserializer().deserialize(request.GET.get('advanced', '[]'))
    search_buffer = None
    se = SearchEngineFactory().create()

    if export != None:
        limit = settings.SEARCH_EXPORT_ITEMS_PER_PAGE
    else:
        limit = settings.SEARCH_ITEMS_PER_PAGE

    query = Query(se, start=limit*int(page-1), limit=limit)
    nested_agg = NestedAgg(path='points', name='geo_aggs')
    nested_agg.add_aggregation(GeoHashGridAgg(field='points.point', name='grid', precision=settings.HEX_BIN_PRECISION))
    nested_agg.add_aggregation(GeoBoundsAgg(field='points.point', name='bounds'))
    query.add_aggregation(nested_agg)

    search_query = Bool()
    permitted_nodegroups = get_permitted_nodegroups(request.user)

    if term_filter != '':
        for term in JSONDeserializer().deserialize(term_filter):
            term_query = Bool()
            if term['type'] == 'term' or term['type'] == 'string':
                string_filter = Bool()
                if term['type'] == 'term':
                    string_filter.must(Match(field='strings.string', query=term['value'], type='phrase'))
                elif term['type'] == 'string':
                    string_filter.should(Match(field='strings.string', query=term['value'], type='phrase_prefix'))
                    string_filter.should(Match(field='strings.string.folded', query=term['value'], type='phrase_prefix'))

                string_filter.filter(Terms(field='strings.nodegroup_id', terms=permitted_nodegroups))
                nested_string_filter = Nested(path='strings', query=string_filter)
                if term['inverted']:
                    search_query.must_not(nested_string_filter)
                else:
                    search_query.must(nested_string_filter)
                    # need to set min_score because the query returns results with score 0 and those have to be removed, which I don't think it should be doing
                    query.min_score('0.01')
            elif term['type'] == 'concept':
                concept_ids = _get_child_concepts(term['value'])
                conceptid_filter = Bool()
                conceptid_filter.filter(Terms(field='domains.conceptid', terms=concept_ids))
                conceptid_filter.filter(Terms(field='domains.nodegroup_id', terms=permitted_nodegroups))
                nested_conceptid_filter = Nested(path='domains', query=conceptid_filter)
                if term['inverted']:
                    search_query.must_not(nested_conceptid_filter)
                else:
                    search_query.filter(nested_conceptid_filter)

    if 'features' in spatial_filter:
        if len(spatial_filter['features']) > 0:
            feature_geom = spatial_filter['features'][0]['geometry']
            feature_properties = spatial_filter['features'][0]['properties']
            buffer = {'width':0,'unit':'ft'}
            if 'buffer' in feature_properties:
                buffer = feature_properties['buffer']
            search_buffer = _buffer(feature_geom, buffer['width'], buffer['unit'])
            feature_geom = JSONDeserializer().deserialize(search_buffer.json)
            geoshape = GeoShape(field='geometries.geom.features.geometry', type=feature_geom['type'], coordinates=feature_geom['coordinates'] )

            invert_spatial_search = False
            if 'inverted' in feature_properties:
                invert_spatial_search = feature_properties['inverted']

            spatial_query = Bool()
            if invert_spatial_search == True:
                spatial_query.must_not(geoshape)
            else:
                spatial_query.filter(geoshape)

            # get the nodegroup_ids that the user has permission to search
            spatial_query.filter(Terms(field='geometries.nodegroup_id', terms=permitted_nodegroups))
            search_query.filter(Nested(path='geometries', query=spatial_query))

    if 'fromDate' in temporal_filter and 'toDate' in temporal_filter:
        now = str(datetime.utcnow())
        start_date = SortableDate(temporal_filter['fromDate'])
        end_date = SortableDate(temporal_filter['toDate'])
        date_nodeid = str(temporal_filter['dateNodeId']) if 'dateNodeId' in temporal_filter and temporal_filter['dateNodeId'] != '' else None
        query_inverted = False if 'inverted' not in temporal_filter else temporal_filter['inverted']

        temporal_query = Bool()

        if query_inverted:
            # inverted date searches need to use an OR clause and are generally more complicated to structure (can't use ES must_not)
            # eg: less than START_DATE OR greater than END_DATE
            inverted_date_query = Bool()
            inverted_date_ranges_query = Bool()

            if start_date.is_valid():
                inverted_date_query.should(Range(field='dates.date', lt=start_date.as_float()))
                inverted_date_ranges_query.should(Range(field='date_ranges.date_range', lt=start_date.as_float()))
            if end_date.is_valid():
                inverted_date_query.should(Range(field='dates.date', gt=end_date.as_float()))
                inverted_date_ranges_query.should(Range(field='date_ranges.date_range', gt=end_date.as_float()))

            date_query = Bool()
            date_query.filter(inverted_date_query)
            date_query.filter(Terms(field='dates.nodegroup_id', terms=permitted_nodegroups))
            if date_nodeid:
                date_query.filter(Term(field='dates.nodeid', term=date_nodeid))
            else:
                date_ranges_query = Bool()
                date_ranges_query.filter(inverted_date_ranges_query)
                date_ranges_query.filter(Terms(field='date_ranges.nodegroup_id', terms=permitted_nodegroups))
                temporal_query.should(Nested(path='date_ranges', query=date_ranges_query))
            temporal_query.should(Nested(path='dates', query=date_query))

        else:
            date_query = Bool()
            date_query.filter(Range(field='dates.date', gte=start_date.as_float(), lte=end_date.as_float()))
            date_query.filter(Terms(field='dates.nodegroup_id', terms=permitted_nodegroups))
            if date_nodeid:
                date_query.filter(Term(field='dates.nodeid', term=date_nodeid))
            else:
                date_ranges_query = Bool()
                date_ranges_query.filter(Range(field='date_ranges.date_range', gte=start_date.as_float(), lte=end_date.as_float(), relation='intersects'))
                date_ranges_query.filter(Terms(field='date_ranges.nodegroup_id', terms=permitted_nodegroups))
                temporal_query.should(Nested(path='date_ranges', query=date_ranges_query))
            temporal_query.should(Nested(path='dates', query=date_query))


        search_query.filter(temporal_query)
        #print search_query.dsl

    datatype_factory = DataTypeFactory()
    if len(advanced_filters) > 0:
        advanced_query = Bool()
        grouped_query = Bool()
        grouped_queries = [grouped_query]
        for index, advanced_filter in enumerate(advanced_filters):
            tile_query = Bool()
            for key, val in advanced_filter.iteritems():
                if key != 'op':
                    node = models.Node.objects.get(pk=key)
                    if request.user.has_perm('read_nodegroup', node.nodegroup):
                        datatype = datatype_factory.get_instance(node.datatype)
                        datatype.append_search_filters(val, node, tile_query, request)
            nested_query = Nested(path='tiles', query=tile_query)
            if advanced_filter['op'] == 'or' and index != 0:
                grouped_query = Bool()
                grouped_queries.append(grouped_query)
            grouped_query.must(nested_query)
        for grouped_query in grouped_queries:
            advanced_query.should(grouped_query)
        search_query.must(advanced_query)

    query.add_query(search_query)
    if search_buffer != None:
        search_buffer = search_buffer.geojson
    return {'query': query, 'search_buffer':search_buffer}
Exemple #13
0
    def time_wheel_config(self, user):
        se = SearchEngineFactory().create()
        query = Query(se, limit=0)
        nested_agg = NestedAgg(path="dates", name="min_max_agg")
        nested_agg.add_aggregation(MinAgg(field="dates.date"))
        nested_agg.add_aggregation(MaxAgg(field="dates.date"))
        query.add_aggregation(nested_agg)
        results = query.search(index=RESOURCES_INDEX)

        if (results is not None and results["aggregations"]["min_max_agg"]
            ["min_dates.date"]["value"] is not None and results["aggregations"]
            ["min_max_agg"]["max_dates.date"]["value"] is not None):
            min_date = int(results["aggregations"]["min_max_agg"]
                           ["min_dates.date"]["value"]) / 10000
            max_date = int(results["aggregations"]["min_max_agg"]
                           ["max_dates.date"]["value"]) / 10000
            # round min and max date to the nearest 1000 years
            min_date = math.ceil(math.fabs(min_date) /
                                 1000) * -1000 if min_date < 0 else math.floor(
                                     min_date / 1000) * 1000
            max_date = math.floor(math.fabs(max_date) /
                                  1000) * -1000 if max_date < 0 else math.ceil(
                                      max_date / 1000) * 1000
            query = Query(se, limit=0)
            range_lookup = {}

            def gen_range_agg(gte=None, lte=None, permitted_nodegroups=None):
                date_query = Bool()
                date_query.filter(
                    Range(field="dates.date",
                          gte=gte,
                          lte=lte,
                          relation="intersects"))
                if permitted_nodegroups is not None:
                    date_query.filter(
                        Terms(field="dates.nodegroup_id",
                              terms=permitted_nodegroups))
                date_ranges_query = Bool()
                date_ranges_query.filter(
                    Range(field="date_ranges.date_range",
                          gte=gte,
                          lte=lte,
                          relation="intersects"))
                if permitted_nodegroups is not None:
                    date_ranges_query.filter(
                        Terms(field="date_ranges.nodegroup_id",
                              terms=permitted_nodegroups))
                wrapper_query = Bool()
                wrapper_query.should(
                    Nested(path="date_ranges", query=date_ranges_query))
                wrapper_query.should(Nested(path="dates", query=date_query))
                return wrapper_query

            date_tiers = {
                "name": "Millennium",
                "interval": 1000,
                "root": True,
                "child": {
                    "name": "Century",
                    "interval": 100,
                    "child": {
                        "name": "Decade",
                        "interval": 10
                    }
                },
            }

            if abs(int(min_date) - int(max_date)) > 1000:
                date_tiers = {
                    "name": "Millennium",
                    "interval": 1000,
                    "root": True,
                    "child": {
                        "name": "Half-millennium",
                        "interval": 500,
                        "child": {
                            "name": "Century",
                            "interval": 100
                        }
                    },
                }

            if settings.TIMEWHEEL_DATE_TIERS is not None:
                date_tiers = settings.TIMEWHEEL_DATE_TIERS

            def add_date_tier(date_tier,
                              low_date,
                              high_date,
                              previous_period_agg=None):
                interval = date_tier["interval"]
                name = date_tier["name"]
                within_range = True
                if "root" in date_tier:
                    high_date = int(high_date) + interval
                for period in range(int(low_date), int(high_date), interval):
                    min_period = period
                    max_period = period + interval
                    if "range" in date_tier:
                        within_range = min_period >= date_tier["range"][
                            "min"] and max_period <= date_tier["range"]["max"]
                    period_name = "{0} ({1} - {2})".format(
                        name, min_period, max_period)
                    nodegroups = self.get_permitted_nodegroups(
                        user) if "root" in date_tier else None
                    period_boolquery = gen_range_agg(
                        gte=ExtendedDateFormat(min_period).lower,
                        lte=ExtendedDateFormat(max_period).lower,
                        permitted_nodegroups=nodegroups)
                    period_agg = FiltersAgg(name=period_name)
                    period_agg.add_filter(period_boolquery)
                    if "root" not in date_tier:
                        if within_range is True:
                            previous_period_agg.add_aggregation(period_agg)
                    range_lookup[period_name] = [min_period, max_period]
                    if "child" in date_tier:
                        add_date_tier(date_tier["child"], min_period,
                                      max_period, period_agg)
                    if "root" in date_tier:
                        query.add_aggregation(period_agg)

            add_date_tier(date_tiers, min_date, max_date)

            root = d3Item(name="root")
            results = {
                "buckets":
                [query.search(index=RESOURCES_INDEX)["aggregations"]]
            }
            results_with_ranges = self.appendDateRanges(results, range_lookup)
            self.transformESAggToD3Hierarchy(results_with_ranges, root)

            # calculate total number of docs
            for child in root.children:
                root.size = root.size + child.size

            if user.username in settings.CACHE_BY_USER:
                key = "time_wheel_config_{0}".format(user.username)
                cache.set(key, root, settings.CACHE_BY_USER[user.username])

            return root
Exemple #14
0
def build_search_results_dsl(request):
    term_filter = request.GET.get('termFilter', '')
    spatial_filter = JSONDeserializer().deserialize(request.GET.get('mapFilter', '{}'))
    export = request.GET.get('export', None)
    page = 1 if request.GET.get('page') == '' else int(request.GET.get('page', 1))
    temporal_filter = JSONDeserializer().deserialize(request.GET.get('temporalFilter', '{}'))

    se = SearchEngineFactory().create()

    if export != None:
        limit = settings.SEARCH_EXPORT_ITEMS_PER_PAGE
    else:
        limit = settings.SEARCH_ITEMS_PER_PAGE

    query = Query(se, start=limit*int(page-1), limit=limit)
    query.add_aggregation(GeoHashGridAgg(field='points', name='grid', precision=settings.HEX_BIN_PRECISION))
    query.add_aggregation(GeoBoundsAgg(field='points', name='bounds'))
    search_query = Bool()


    if term_filter != '':
        for term in JSONDeserializer().deserialize(term_filter):
            if term['type'] == 'term':
                term_filter = Match(field='strings', query=term['value'], type='phrase')
                if term['inverted']:
                    search_query.must_not(term_filter)
                else:
                    search_query.must(term_filter)
            elif term['type'] == 'concept':
                concept_ids = _get_child_concepts(term['value'])
                conceptid_filter = Terms(field='domains.conceptid', terms=concept_ids)
                if term['inverted']:
                    search_query.must_not(conceptid_filter)
                else:
                    search_query.must(conceptid_filter)
            elif term['type'] == 'string':
                string_filter = Bool()
                string_filter.should(Match(field='strings', query=term['value'], type='phrase_prefix'))
                string_filter.should(Match(field='strings.folded', query=term['value'], type='phrase_prefix'))
                if term['inverted']:
                    search_query.must_not(string_filter)
                else:
                    search_query.must(string_filter)

    if 'features' in spatial_filter:
        if len(spatial_filter['features']) > 0:
            feature_geom = spatial_filter['features'][0]['geometry']
            feature_properties = spatial_filter['features'][0]['properties']
            buffer = {'width':0,'unit':'ft'}
            if 'buffer' in feature_properties:
                buffer = feature_properties['buffer']
            feature_geom = JSONDeserializer().deserialize(_buffer(feature_geom,buffer['width'],buffer['unit']).json)
            geoshape = GeoShape(field='geometries.features.geometry', type=feature_geom['type'], coordinates=feature_geom['coordinates'] )

            invert_spatial_search = False
            if 'inverted' in feature_properties:
                invert_spatial_search = feature_properties['inverted']

            if invert_spatial_search == True:
                search_query.must_not(geoshape)
            else:
                search_query.must(geoshape)

    if 'fromDate' in temporal_filter and 'toDate' in temporal_filter:
        now = str(datetime.utcnow())
        start_date = None
        end_date = None
        start_year = 'null'
        end_year = 'null'
        try:
            # start_date = parser.parse(temporal_filter['fromDate'])
            # start_date = start_date.isoformat()
            sd = FlexiDate.from_str(temporal_filter['fromDate'])
            start_date = int((sd.as_float()-1970)*31556952*1000)

            #start_year = parser.parse(start_date).year
            start_year = sd.year
        except:
            pass

        try:
            # end_date = parser.parse(temporal_filter['toDate'])
            # end_date = end_date.isoformat()
            ed = FlexiDate.from_str(temporal_filter['toDate'])
            end_date = int((ed.as_float()-1970)*31556952*1000)

            #end_year = parser.parse(end_date).year
            end_year = ed.year
        except:
            pass


        # add filter for concepts that define min or max dates
        sql = None
        basesql = """
            SELECT value.conceptid
            FROM (
                SELECT
                    {select_clause},
                    v.conceptid
                FROM
                    public."values" v,
                    public."values" v2
                WHERE
                    v.conceptid = v2.conceptid and
                    v.valuetype = 'min_year' and
                    v2.valuetype = 'max_year'
            ) as value
            WHERE overlap = true;
        """

        temporal_query = Bool()

        if 'inverted' not in temporal_filter:
            temporal_filter['inverted'] = False

        if temporal_filter['inverted']:
            # inverted date searches need to use an OR clause and are generally more complicated to structure (can't use ES must_not)
            # eg: less than START_DATE OR greater than END_DATE
            select_clause = []
            inverted_date_filter = Bool()

            field = 'dates'
            if 'dateNodeId' in temporal_filter and temporal_filter['dateNodeId'] != '':
                field='tiles.data.%s' % (temporal_filter['dateNodeId'])

            if start_date is not None:
                inverted_date_filter.should(Range(field=field, lte=start_date))
                select_clause.append("(numrange(v.value::int, v2.value::int, '[]') && numrange(null,{start_year},'[]'))")
            if end_date is not None:
                inverted_date_filter.should(Range(field=field, gte=end_date))
                select_clause.append("(numrange(v.value::int, v2.value::int, '[]') && numrange({end_year},null,'[]'))")

            if 'dateNodeId' in temporal_filter and temporal_filter['dateNodeId'] != '':
                date_range_query = Nested(path='tiles', query=inverted_date_filter)
                temporal_query.should(date_range_query)
            else:
                temporal_query.should(inverted_date_filter)

                select_clause = " or ".join(select_clause) + " as overlap"
                sql = basesql.format(select_clause=select_clause).format(start_year=start_year, end_year=end_year)

        else:
            if 'dateNodeId' in temporal_filter and temporal_filter['dateNodeId'] != '':
                range = Range(field='tiles.data.%s' % (temporal_filter['dateNodeId']), gte=start_date, lte=end_date)
                date_range_query = Nested(path='tiles', query=range)
                temporal_query.should(date_range_query)
            else:
                date_range_query = Range(field='dates', gte=start_date, lte=end_date)
                temporal_query.should(date_range_query)

                select_clause = """
                    numrange(v.value::int, v2.value::int, '[]') && numrange({start_year},{end_year},'[]') as overlap
                """
                sql = basesql.format(select_clause=select_clause).format(start_year=start_year, end_year=end_year)

        # is a dateNodeId is not specified
        if sql is not None:
            cursor = connection.cursor()
            cursor.execute(sql)
            ret =  [str(row[0]) for row in cursor.fetchall()]

            if len(ret) > 0:
                conceptid_filter = Terms(field='domains.conceptid', terms=ret)
                temporal_query.should(conceptid_filter)


        search_query.must(temporal_query)

    query.add_query(search_query)
    return query
Exemple #15
0
def time_wheel_config(request):
    se = SearchEngineFactory().create()
    query = Query(se, limit=0)
    query.add_aggregation(MinAgg(field='dates', format='y'))
    query.add_aggregation(MaxAgg(field='dates', format='y'))
    results = query.search(index='resource')
    if results is not None and results['aggregations']['min_dates'][
            'value'] is not None and results['aggregations']['max_dates'][
                'value'] is not None:
        min_date = int(results['aggregations']['min_dates']['value_as_string'])
        max_date = int(results['aggregations']['max_dates']['value_as_string'])

        # round min and max date to the nearest 1000 years
        min_date = math.ceil(math.fabs(min_date) /
                             1000) * -1000 if min_date < 0 else math.floor(
                                 min_date / 1000) * 1000
        max_date = math.floor(math.fabs(max_date) /
                              1000) * -1000 if max_date < 0 else math.ceil(
                                  max_date / 1000) * 1000

        query = Query(se, limit=0)
        for millennium in range(int(min_date), int(max_date) + 1000, 1000):
            min_millenium = millennium
            max_millenium = millennium + 1000
            millenium_agg = DateRangeAgg(name="Millennium (%s-%s)" %
                                         (min_millenium, max_millenium),
                                         field='dates',
                                         format='y',
                                         min_date=str(min_millenium),
                                         max_date=str(max_millenium))

            for century in range(min_millenium, max_millenium, 100):
                min_century = century
                max_century = century + 100
                century_aggregation = DateRangeAgg(name="Century (%s-%s)" %
                                                   (min_century, max_century),
                                                   field='dates',
                                                   format='y',
                                                   min_date=str(min_century),
                                                   max_date=str(max_century))
                millenium_agg.add_aggregation(century_aggregation)

                for decade in range(min_century, max_century, 10):
                    min_decade = decade
                    max_decade = decade + 10
                    decade_aggregation = DateRangeAgg(name="Decade (%s-%s)" %
                                                      (min_decade, max_decade),
                                                      field='dates',
                                                      format='y',
                                                      min_date=str(min_decade),
                                                      max_date=str(max_decade))
                    century_aggregation.add_aggregation(decade_aggregation)

            query.add_aggregation(millenium_agg)

        root = d3Item(name='root')
        transformESAggToD3Hierarchy(
            {'buckets': [query.search(index='resource')['aggregations']]},
            root)
        return JSONResponse(root, indent=4)
    else:
        return HttpResponseNotFound(
            _('Error retrieving the time wheel config'))
Exemple #16
0
def build_search_results_dsl(request):
    term_filter = request.GET.get('termFilter', '')
    spatial_filter = JSONDeserializer().deserialize(
        request.GET.get('mapFilter', '{}'))
    export = request.GET.get('export', None)
    page = 1 if request.GET.get('page') == '' else int(
        request.GET.get('page', 1))
    temporal_filter = JSONDeserializer().deserialize(
        request.GET.get('temporalFilter', '{}'))

    se = SearchEngineFactory().create()

    if export != None:
        limit = settings.SEARCH_EXPORT_ITEMS_PER_PAGE
    else:
        limit = settings.SEARCH_ITEMS_PER_PAGE

    query = Query(se, start=limit * int(page - 1), limit=limit)
    query.add_aggregation(
        GeoHashGridAgg(field='points',
                       name='grid',
                       precision=settings.HEX_BIN_PRECISION))
    query.add_aggregation(GeoBoundsAgg(field='points', name='bounds'))
    search_query = Bool()

    if term_filter != '':
        for term in JSONDeserializer().deserialize(term_filter):
            if term['type'] == 'term':
                term_filter = Match(field='strings',
                                    query=term['value'],
                                    type='phrase')
                if term['inverted']:
                    search_query.must_not(term_filter)
                else:
                    search_query.must(term_filter)
            elif term['type'] == 'concept':
                concept_ids = _get_child_concepts(term['value'])
                conceptid_filter = Terms(field='domains.conceptid',
                                         terms=concept_ids)
                if term['inverted']:
                    search_query.must_not(conceptid_filter)
                else:
                    search_query.must(conceptid_filter)
            elif term['type'] == 'string':
                string_filter = Bool()
                string_filter.should(
                    Match(field='strings',
                          query=term['value'],
                          type='phrase_prefix'))
                string_filter.should(
                    Match(field='strings.folded',
                          query=term['value'],
                          type='phrase_prefix'))
                if term['inverted']:
                    search_query.must_not(string_filter)
                else:
                    search_query.must(string_filter)

    if 'features' in spatial_filter:
        if len(spatial_filter['features']) > 0:
            feature_geom = spatial_filter['features'][0]['geometry']
            feature_properties = spatial_filter['features'][0]['properties']
            buffer = {'width': 0, 'unit': 'ft'}
            if 'buffer' in feature_properties:
                buffer = feature_properties['buffer']
            feature_geom = JSONDeserializer().deserialize(
                _buffer(feature_geom, buffer['width'], buffer['unit']).json)
            geoshape = GeoShape(field='geometries.features.geometry',
                                type=feature_geom['type'],
                                coordinates=feature_geom['coordinates'])

            invert_spatial_search = False
            if 'inverted' in feature_properties:
                invert_spatial_search = feature_properties['inverted']

            if invert_spatial_search == True:
                search_query.must_not(geoshape)
            else:
                search_query.must(geoshape)

    if 'fromDate' in temporal_filter and 'toDate' in temporal_filter:
        now = str(datetime.utcnow())
        start_date = None
        end_date = None
        start_year = 'null'
        end_year = 'null'
        try:
            # start_date = parser.parse(temporal_filter['fromDate'])
            # start_date = start_date.isoformat()
            sd = FlexiDate.from_str(temporal_filter['fromDate'])
            start_date = int((sd.as_float() - 1970) * 31556952 * 1000)

            #start_year = parser.parse(start_date).year
            start_year = sd.year
        except:
            pass

        try:
            # end_date = parser.parse(temporal_filter['toDate'])
            # end_date = end_date.isoformat()
            ed = FlexiDate.from_str(temporal_filter['toDate'])
            end_date = int((ed.as_float() - 1970) * 31556952 * 1000)

            #end_year = parser.parse(end_date).year
            end_year = ed.year
        except:
            pass

        # add filter for concepts that define min or max dates
        sql = None
        basesql = """
            SELECT value.conceptid
            FROM (
                SELECT
                    {select_clause},
                    v.conceptid
                FROM
                    public."values" v,
                    public."values" v2
                WHERE
                    v.conceptid = v2.conceptid and
                    v.valuetype = 'min_year' and
                    v2.valuetype = 'max_year'
            ) as value
            WHERE overlap = true;
        """

        temporal_query = Bool()

        if 'inverted' not in temporal_filter:
            temporal_filter['inverted'] = False

        if temporal_filter['inverted']:
            # inverted date searches need to use an OR clause and are generally more complicated to structure (can't use ES must_not)
            # eg: less than START_DATE OR greater than END_DATE
            select_clause = []
            inverted_date_filter = Bool()

            field = 'dates'
            if 'dateNodeId' in temporal_filter and temporal_filter[
                    'dateNodeId'] != '':
                field = 'tiles.data.%s' % (temporal_filter['dateNodeId'])

            if start_date is not None:
                inverted_date_filter.should(Range(field=field, lte=start_date))
                select_clause.append(
                    "(numrange(v.value::int, v2.value::int, '[]') && numrange(null,{start_year},'[]'))"
                )
            if end_date is not None:
                inverted_date_filter.should(Range(field=field, gte=end_date))
                select_clause.append(
                    "(numrange(v.value::int, v2.value::int, '[]') && numrange({end_year},null,'[]'))"
                )

            if 'dateNodeId' in temporal_filter and temporal_filter[
                    'dateNodeId'] != '':
                date_range_query = Nested(path='tiles',
                                          query=inverted_date_filter)
                temporal_query.should(date_range_query)
            else:
                temporal_query.should(inverted_date_filter)

                select_clause = " or ".join(select_clause) + " as overlap"
                sql = basesql.format(select_clause=select_clause).format(
                    start_year=start_year, end_year=end_year)

        else:
            if 'dateNodeId' in temporal_filter and temporal_filter[
                    'dateNodeId'] != '':
                range = Range(field='tiles.data.%s' %
                              (temporal_filter['dateNodeId']),
                              gte=start_date,
                              lte=end_date)
                date_range_query = Nested(path='tiles', query=range)
                temporal_query.should(date_range_query)
            else:
                date_range_query = Range(field='dates',
                                         gte=start_date,
                                         lte=end_date)
                temporal_query.should(date_range_query)

                select_clause = """
                    numrange(v.value::int, v2.value::int, '[]') && numrange({start_year},{end_year},'[]') as overlap
                """
                sql = basesql.format(select_clause=select_clause).format(
                    start_year=start_year, end_year=end_year)

        # is a dateNodeId is not specified
        if sql is not None:
            cursor = connection.cursor()
            cursor.execute(sql)
            ret = [str(row[0]) for row in cursor.fetchall()]

            if len(ret) > 0:
                conceptid_filter = Terms(field='domains.conceptid', terms=ret)
                temporal_query.should(conceptid_filter)

        search_query.must(temporal_query)

    query.add_query(search_query)
    return query