Example #1
0
def search_terms(request):
    lang = request.GET.get('lang', settings.LANGUAGE_CODE)
    se = SearchEngineFactory().create()
    searchString = request.GET.get('q', '')
    user_is_reviewer = request.user.groups.filter(name='Resource Reviewer').exists()

    i = 0
    ret = {}
    for index in ['terms', 'concepts']:
        query = Query(se, start=0, limit=0)
        boolquery = Bool()
        boolquery.should(Match(field='value', query=searchString.lower(), type='phrase_prefix'))
        boolquery.should(Match(field='value.folded', query=searchString.lower(), type='phrase_prefix'))
        boolquery.should(Match(field='value.folded', query=searchString.lower(), fuzziness='AUTO', prefix_length=settings.SEARCH_TERM_SENSITIVITY))

        if user_is_reviewer is False and index == 'terms':
            boolquery.filter(Terms(field='provisional', terms=['false']))

        query.add_query(boolquery)
        base_agg = Aggregation(name='value_agg', type='terms', field='value.raw', size=settings.SEARCH_DROPDOWN_LENGTH, order={"max_score": "desc"})
        nodegroupid_agg = Aggregation(name='nodegroupid', type='terms', field='nodegroupid')
        top_concept_agg = Aggregation(name='top_concept', type='terms', field='top_concept')
        conceptid_agg = Aggregation(name='conceptid', type='terms', field='conceptid')
        max_score_agg = MaxAgg(name='max_score', script='_score')

        top_concept_agg.add_aggregation(conceptid_agg)
        base_agg.add_aggregation(max_score_agg)
        base_agg.add_aggregation(top_concept_agg)
        base_agg.add_aggregation(nodegroupid_agg)
        query.add_aggregation(base_agg)

        ret[index] = []
        results = query.search(index=index)
        for result in results['aggregations']['value_agg']['buckets']:
            if len(result['top_concept']['buckets']) > 0:
                for top_concept in result['top_concept']['buckets']:
                    top_concept_id = top_concept['key']
                    top_concept_label = get_preflabel_from_conceptid(top_concept['key'], lang)['value']
                    for concept in top_concept['conceptid']['buckets']:
                        ret[index].append({
                            'type': 'concept',
                            'context': top_concept_id,
                            'context_label': top_concept_label,
                            'id': i,
                            'text': result['key'],
                            'value': concept['key']
                        })
                    i = i + 1
            else:
                ret[index].append({
                    'type': 'term',
                    'context': '',
                    'context_label': get_resource_model_label(result),
                    'id': i,
                    'text': result['key'],
                    'value': result['key']
                })
                i = i + 1

    return JSONResponse(ret)
Example #2
0
def get_preflabel_from_conceptid(conceptid, lang):
    ret = None
    default = {
        "category": "",
        "conceptid": "",
        "language": "",
        "value": "",
        "type": "",
        "id": ""
    }
    query = Query(se)
    bool_query = Bool()
    bool_query.must(Match(field="type", query="prefLabel", type="phrase"))
    bool_query.filter(Terms(field="conceptid", terms=[conceptid]))
    query.add_query(bool_query)
    preflabels = query.search(index=CONCEPTS_INDEX)["hits"]["hits"]
    for preflabel in preflabels:
        default = preflabel["_source"]
        # get the label in the preferred language, otherwise get the label in the default language
        if preflabel["_source"]["language"] == lang:
            return preflabel["_source"]
        if preflabel["_source"]["language"].split("-")[0] == lang.split(
                "-")[0]:
            ret = preflabel["_source"]
        if preflabel["_source"][
                "language"] == settings.LANGUAGE_CODE and ret is None:
            ret = preflabel["_source"]
    return default if ret is None else ret
Example #3
0
def get_preflabel_from_conceptid(conceptid, lang):
    ret = None
    default = {
        "category": "",
        "conceptid": "",
        "language": "",
        "value": "",
        "type": "",
        "id": ""
    }
    se = SearchEngineFactory().create()
    query = Query(se)
    bool_query = Bool()
    bool_query.must(Match(field='type', query='prefLabel', type='phrase'))
    bool_query.filter(Terms(field='conceptid', terms=[conceptid]))
    query.add_query(bool_query)
    preflabels = query.search(index='strings', doc_type='concept')['hits']['hits']
    for preflabel in preflabels:
        default = preflabel['_source']
        # get the label in the preferred language, otherwise get the label in the default language
        if preflabel['_source']['language'] == lang:
            return preflabel['_source']
        if preflabel['_source']['language'].split('-')[0] == lang.split('-')[0]:
            ret = preflabel['_source']
        if preflabel['_source']['language'] == settings.LANGUAGE_CODE and ret == None:
            ret = preflabel['_source']
    return default if ret == None else ret
Example #4
0
    def delete(self, user={}, note=''):
        """
        Deletes a single resource and any related indexed data

        """

        se = SearchEngineFactory().create()
        related_resources = self.get_related_resources(lang="en-US",
                                                       start=0,
                                                       limit=1000)
        for rr in related_resources['resource_relationships']:
            models.ResourceXResource.objects.get(pk=rr['resourcexid']).delete()
        query = Query(se)
        bool_query = Bool()
        bool_query.filter(
            Terms(field='resourceinstanceid', terms=[self.resourceinstanceid]))
        query.add_query(bool_query)
        results = query.search(index='strings',
                               doc_type='term')['hits']['hits']
        for result in results:
            se.delete(index='strings', doc_type='term', id=result['_id'])
        se.delete(index='resource',
                  doc_type=str(self.graph_id),
                  id=self.resourceinstanceid)
        self.save_edit(edit_type='delete')
        super(Resource, self).delete()
Example #5
0
def get_preflabel_from_conceptid(conceptid, lang):
    ret = None
    default = {
        "category": "",
        "conceptid": "",
        "language": "",
        "value": "",
        "type": "",
        "id": ""
    }
    se = SearchEngineFactory().create()
    query = Query(se)
    bool_query = Bool()
    bool_query.must(Match(field='type', query='prefLabel', type='phrase'))
    bool_query.filter(Terms(field='conceptid', terms=[conceptid]))
    query.add_query(bool_query)
    preflabels = query.search(index='strings', doc_type='concept')['hits']['hits']
    for preflabel in preflabels:
        default = preflabel['_source']
        # get the label in the preferred language, otherwise get the label in the default language
        if preflabel['_source']['language'] == lang:
            return preflabel['_source']
        if preflabel['_source']['language'].split('-')[0] == lang.split('-')[0]:
            ret = preflabel['_source']
        if preflabel['_source']['language'] == settings.LANGUAGE_CODE and ret == None:
            ret = preflabel['_source']
    return default if ret == None else ret
Example #6
0
    def delete(self, *args, **kwargs):
        se = SearchEngineFactory().create()
        request = kwargs.pop('request', None)
        provisional_edit_log_details = kwargs.pop('provisional_edit_log_details', None)
        for tile in self.tiles:
            tile.delete(*args, request=request, **kwargs)
        try:
            user = request.user
            user_is_reviewer = request.user.groups.filter(name='Resource Reviewer').exists()
        except AttributeError: #no user
            user = None

        if user_is_reviewer is True or self.user_owns_provisional(user):
            query = Query(se)
            bool_query = Bool()
            bool_query.filter(Terms(field='tileid', terms=[self.tileid]))
            query.add_query(bool_query)
            results = query.search(index='terms')['hits']['hits']

            for result in results:
                se.delete(index='terms', id=result['_id'])

            self.__preDelete(request)
            self.save_edit(
                user=request.user,
                edit_type='tile delete',
                old_value=self.data,
                provisional_edit_log_details=provisional_edit_log_details)
            super(Tile, self).delete(*args, **kwargs)
            resource = Resource.objects.get(resourceinstanceid=self.resourceinstance.resourceinstanceid)
            resource.index()

        else:
            self.apply_provisional_edit(user, data={}, action='delete')
            super(Tile, self).save(*args, **kwargs)
Example #7
0
    def delete(self, *args, **kwargs):
        se = SearchEngineFactory().create()
        request = kwargs.pop("request", None)
        provisional_edit_log_details = kwargs.pop("provisional_edit_log_details", None)
        for tile in self.tiles:
            tile.delete(*args, request=request, **kwargs)
        try:
            user = request.user
            user_is_reviewer = user_is_resource_reviewer(user)
        except AttributeError:  # no user
            user = None
            user_is_reviewer = True

        if user_is_reviewer is True or self.user_owns_provisional(user):
            query = Query(se)
            bool_query = Bool()
            bool_query.filter(Terms(field="tileid", terms=[self.tileid]))
            query.add_query(bool_query)
            results = query.search(index="terms")["hits"]["hits"]

            for result in results:
                se.delete(index="terms", id=result["_id"])

            self.__preDelete(request)
            self.save_edit(
                user=request.user, edit_type="tile delete", old_value=self.data, provisional_edit_log_details=provisional_edit_log_details
            )
            super(Tile, self).delete(*args, **kwargs)
            resource = Resource.objects.get(resourceinstanceid=self.resourceinstance.resourceinstanceid)
            resource.index()

        else:
            self.apply_provisional_edit(user, data={}, action="delete")
            super(Tile, self).save(*args, **kwargs)
Example #8
0
    def append_dsl(self, search_results_object, permitted_nodegroups,
                   include_provisional):
        search_query = Bool()
        querysting_params = self.request.GET.get(details["componentname"], "")
        spatial_filter = JSONDeserializer().deserialize(querysting_params)
        if "features" in spatial_filter:
            if len(spatial_filter["features"]) > 0:
                feature_geom = spatial_filter["features"][0]["geometry"]
                feature_properties = {}
                if "properties" in spatial_filter["features"][0]:
                    feature_properties = spatial_filter["features"][0][
                        "properties"]
                buffer = {"width": 0, "unit": "ft"}
                if "buffer" in feature_properties:
                    buffer = feature_properties["buffer"]
                search_buffer = _buffer(feature_geom, buffer["width"],
                                        buffer["unit"])
                feature_geom = JSONDeserializer().deserialize(
                    search_buffer.geojson)
                geoshape = GeoShape(field="geometries.geom.features.geometry",
                                    type=feature_geom["type"],
                                    coordinates=feature_geom["coordinates"])

                invert_spatial_search = False
                if "inverted" in feature_properties:
                    invert_spatial_search = feature_properties["inverted"]

                spatial_query = Bool()
                if invert_spatial_search is True:
                    spatial_query.must_not(geoshape)
                else:
                    spatial_query.filter(geoshape)

                # get the nodegroup_ids that the user has permission to search
                spatial_query.filter(
                    Terms(field="geometries.nodegroup_id",
                          terms=permitted_nodegroups))

                if include_provisional is False:
                    spatial_query.filter(
                        Terms(field="geometries.provisional", terms=["false"]))

                elif include_provisional == "only provisional":
                    spatial_query.filter(
                        Terms(field="geometries.provisional", terms=["true"]))

                search_query.filter(
                    Nested(path="geometries", query=spatial_query))

        search_results_object["query"].add_query(search_query)

        if details["componentname"] not in search_results_object:
            search_results_object[details["componentname"]] = {}

        try:
            search_results_object[
                details["componentname"]]["search_buffer"] = feature_geom
        except NameError:
            logger.info(_("Feature geometry is not defined"))
Example #9
0
    def create_nested_attribute_filter(self, doc_id, nodegroup_id, value_list):

        new_string_filter = Bool()
        new_string_filter.filter(Terms(field='strings.nodegroup_id', terms=[nodegroup_id]))
        for value in value_list:
            new_string_filter.should(Match(field='strings.string', query=value, type='phrase'))
        nested = Nested(path='strings', query=new_string_filter)
        return nested
Example #10
0
def search_terms(request):
    lang = request.GET.get('lang', settings.LANGUAGE_CODE)
    se = SearchEngineFactory().create()
    searchString = request.GET.get('q', '')
    query = Query(se, start=0, limit=0)
    user_is_reviewer = request.user.groups.filter(name='Resource Reviewer').exists()

    boolquery = Bool()
    boolquery.should(Match(field='value', query=searchString.lower(), type='phrase_prefix', fuzziness='AUTO'))
    boolquery.should(Match(field='value.folded', query=searchString.lower(), type='phrase_prefix', fuzziness='AUTO'))
    boolquery.should(Match(field='value.folded', query=searchString.lower(), fuzziness='AUTO'))

    if user_is_reviewer is False:
        boolquery.filter(Terms(field='provisional', terms=['false']))

    query.add_query(boolquery)
    base_agg = Aggregation(name='value_agg', type='terms', field='value.raw', size=settings.SEARCH_DROPDOWN_LENGTH, order={"max_score": "desc"})
    nodegroupid_agg = Aggregation(name='nodegroupid', type='terms', field='nodegroupid')
    top_concept_agg = Aggregation(name='top_concept', type='terms', field='top_concept')
    conceptid_agg = Aggregation(name='conceptid', type='terms', field='conceptid')
    max_score_agg = MaxAgg(name='max_score', script='_score')

    top_concept_agg.add_aggregation(conceptid_agg)
    base_agg.add_aggregation(max_score_agg)
    base_agg.add_aggregation(top_concept_agg)
    base_agg.add_aggregation(nodegroupid_agg)
    query.add_aggregation(base_agg)
    results = query.search(index='strings') or {'hits': {'hits':[]}}

    i = 0;
    ret = []
    for result in results['aggregations']['value_agg']['buckets']:
        if len(result['top_concept']['buckets']) > 0:
            for top_concept in result['top_concept']['buckets']:
                top_concept_id = top_concept['key']
                top_concept_label = get_preflabel_from_conceptid(top_concept['key'], lang)['value']
                for concept in top_concept['conceptid']['buckets']:
                    ret.append({
                        'type': 'concept',
                        'context': top_concept_id,
                        'context_label': top_concept_label,
                        'id': i,
                        'text': result['key'],
                        'value': concept['key']
                    })
                i = i + 1
        else:
            ret.append({
                'type': 'term',
                'context': '',
                'context_label': get_resource_model_label(result),
                'id': i,
                'text': result['key'],
                'value': result['key']
            })
            i = i + 1

    return JSONResponse(ret)
Example #11
0
    def delete(self, user={}, note=""):
        """
        Deletes a single resource and any related indexed data

        """

        permit_deletion = False
        graph = models.GraphModel.objects.get(graphid=self.graph_id)
        if graph.isactive is False:
            message = _("This model is not yet active; unable to delete.")
            raise ModelInactiveError(message)
        if user != {}:
            user_is_reviewer = user_is_resource_reviewer(user)
            if user_is_reviewer is False:
                tiles = list(models.TileModel.objects.filter(resourceinstance=self))
                resource_is_provisional = True if sum([len(t.data) for t in tiles]) == 0 else False
                if resource_is_provisional is True:
                    permit_deletion = True
            else:
                permit_deletion = True
        else:
            permit_deletion = True

        if permit_deletion is True:
            related_resources = self.get_related_resources(lang="en-US", start=0, limit=1000, page=0)
            for rr in related_resources["resource_relationships"]:
                # delete any related resource entries, also reindex the resource that references this resource that's being deleted
                try:
                    resourceXresource = models.ResourceXResource.objects.get(pk=rr["resourcexid"])
                    resource_to_reindex = (
                        resourceXresource.resourceinstanceidfrom_id
                        if resourceXresource.resourceinstanceidto_id == self.resourceinstanceid
                        else resourceXresource.resourceinstanceidto_id
                    )
                    resourceXresource.delete(deletedResourceId=self.resourceinstanceid)
                    res = Resource.objects.get(pk=resource_to_reindex)
                    res.load_tiles()
                    res.index()
                except ObjectDoesNotExist:
                    se.delete(index=RESOURCE_RELATIONS_INDEX, id=rr["resourcexid"])

            query = Query(se)
            bool_query = Bool()
            bool_query.filter(Terms(field="resourceinstanceid", terms=[self.resourceinstanceid]))
            query.add_query(bool_query)
            results = query.search(index=TERMS_INDEX)["hits"]["hits"]
            for result in results:
                se.delete(index=TERMS_INDEX, id=result["_id"])
            se.delete(index=RESOURCES_INDEX, id=self.resourceinstanceid)

            try:
                self.save_edit(edit_type="delete", user=user, note=self.displayname)
            except:
                pass
            super(Resource, self).delete()

        return permit_deletion
Example #12
0
    def append_dsl(self, search_results_object, permitted_nodegroups,
                   include_provisional):
        search_query = Bool()
        querysting_params = self.request.GET.get(details['componentname'], '')
        spatial_filter = JSONDeserializer().deserialize(querysting_params)
        if 'features' in spatial_filter:
            if len(spatial_filter['features']) > 0:
                feature_geom = spatial_filter['features'][0]['geometry']
                feature_properties = {}
                if 'properties' in spatial_filter['features'][0]:
                    feature_properties = spatial_filter['features'][0][
                        'properties']
                buffer = {'width': 0, 'unit': 'ft'}
                if 'buffer' in feature_properties:
                    buffer = feature_properties['buffer']
                search_buffer = _buffer(feature_geom, buffer['width'],
                                        buffer['unit'])
                feature_geom = JSONDeserializer().deserialize(
                    search_buffer.json)
                geoshape = GeoShape(field='geometries.geom.features.geometry',
                                    type=feature_geom['type'],
                                    coordinates=feature_geom['coordinates'])

                invert_spatial_search = False
                if 'inverted' in feature_properties:
                    invert_spatial_search = feature_properties['inverted']

                spatial_query = Bool()
                if invert_spatial_search is True:
                    spatial_query.must_not(geoshape)
                else:
                    spatial_query.filter(geoshape)

                # get the nodegroup_ids that the user has permission to search
                spatial_query.filter(
                    Terms(field='geometries.nodegroup_id',
                          terms=permitted_nodegroups))

                if include_provisional is False:
                    spatial_query.filter(
                        Terms(field='geometries.provisional', terms=['false']))

                elif include_provisional == 'only provisional':
                    spatial_query.filter(
                        Terms(field='geometries.provisional', terms=['true']))

                search_query.filter(
                    Nested(path='geometries', query=spatial_query))

        search_results_object['query'].add_query(search_query)

        if details['componentname'] not in search_results_object:
            search_results_object[details['componentname']] = {}
        search_results_object[
            details['componentname']]['search_buffer'] = search_buffer.geojson
Example #13
0
    def append_null_search_filters(self, value, node, query, request):
        """
        Appends the search query dsl to search for fields that have not been populated
        """
        base_query = Bool()
        base_query.filter(Terms(field="graph_id", terms=[str(node.graph_id)]))

        null_query = Bool()
        data_exists_query = Exists(field="tiles.data.%s" % (str(node.pk)))
        nested_query = Nested(path="tiles", query=data_exists_query)
        null_query.must(nested_query)
        if value["op"] == "null":
            # search for tiles that don't exist
            exists_query = Bool()
            exists_query.must_not(null_query)
            base_query.should(exists_query)

            # search for tiles that do exist, but that have null or [] as values
            func_query = Dsl()
            func_query.dsl = {
                "function_score": {
                    "min_score": 1,
                    "query": {"match_all": {}},
                    "functions": [
                        {
                            "script_score": {
                                "script": {
                                    "source": """
                                    int null_docs = 0;
                                    for(tile in params._source.tiles){
                                        if(tile.data.containsKey(params.node_id)){
                                            def val = tile.data.get(params.node_id);
                                            if (val == null || (val instanceof List && val.length==0)) {
                                                null_docs++;
                                                break;
                                            }
                                        }
                                    }
                                    return null_docs;
                                """,
                                    "lang": "painless",
                                    "params": {"node_id": "%s" % (str(node.pk))},
                                }
                            }
                        }
                    ],
                    "score_mode": "max",
                    "boost": 1,
                    "boost_mode": "replace",
                }
            }
            base_query.should(func_query)
        elif value["op"] == "not_null":
            base_query.must(null_query)
        query.must(base_query)
Example #14
0
    def delete_index(self, resourceinstanceid=None):
        """
        Deletes all references to a resource from all indexes

        Keyword Arguments:
        resourceinstanceid -- the resource instance id to delete from related indexes, if supplied will use this over self.resourceinstanceid
        """

        if resourceinstanceid is None:
            resourceinstanceid = self.resourceinstanceid
        resourceinstanceid = str(resourceinstanceid)

        # delete any related terms
        query = Query(se)
        bool_query = Bool()
        bool_query.filter(
            Terms(field="resourceinstanceid", terms=[resourceinstanceid]))
        query.add_query(bool_query)
        query.delete(index=TERMS_INDEX)

        # delete any related resource index entries
        query = Query(se)
        bool_query = Bool()
        bool_query.should(
            Terms(field="resourceinstanceidto", terms=[resourceinstanceid]))
        bool_query.should(
            Terms(field="resourceinstanceidfrom", terms=[resourceinstanceid]))
        query.add_query(bool_query)
        query.delete(index=RESOURCE_RELATIONS_INDEX)

        # reindex any related resources
        query = Query(se)
        bool_query = Bool()
        bool_query.filter(
            Nested(path="ids",
                   query=Terms(field="ids.id", terms=[resourceinstanceid])))
        query.add_query(bool_query)
        results = query.search(index=RESOURCES_INDEX)["hits"]["hits"]
        for result in results:
            try:
                res = Resource.objects.get(pk=result["_id"])
                res.load_tiles()
                res.index()
            except ObjectDoesNotExist:
                pass

        # delete resource index
        se.delete(index=RESOURCES_INDEX, id=resourceinstanceid)

        # delete resources from custom indexes
        for index in settings.ELASTICSEARCH_CUSTOM_INDEXES:
            es_index = import_class_from_string(index["module"])(index["name"])
            es_index.delete_resources(resources=self)
Example #15
0
    def delete(self, user={}, note=''):
        """
        Deletes a single resource and any related indexed data

        """

        permit_deletion = False
        graph = models.GraphModel.objects.get(graphid=self.graph_id)
        if graph.isactive is False:
            message = _('This model is not yet active; unable to delete.')
            raise ModelInactiveError(message)
        if user != {}:
            user_is_reviewer = user.groups.filter(
                name='Resource Reviewer').exists()
            if user_is_reviewer is False:
                tiles = list(
                    models.TileModel.objects.filter(resourceinstance=self))
                resource_is_provisional = True if sum(
                    [len(t.data) for t in tiles]) == 0 else False
                if resource_is_provisional is True:
                    permit_deletion = True
            else:
                permit_deletion = True
        else:
            permit_deletion = True

        if permit_deletion is True:
            se = SearchEngineFactory().create()
            related_resources = self.get_related_resources(lang="en-US",
                                                           start=0,
                                                           limit=1000,
                                                           page=0)
            for rr in related_resources['resource_relationships']:
                models.ResourceXResource.objects.get(
                    pk=rr['resourcexid']).delete()
            query = Query(se)
            bool_query = Bool()
            bool_query.filter(
                Terms(field='resourceinstanceid',
                      terms=[self.resourceinstanceid]))
            query.add_query(bool_query)
            results = query.search(index='terms')['hits']['hits']
            for result in results:
                se.delete(index='terms', id=result['_id'])
            se.delete(index='resources', id=self.resourceinstanceid)

            self.save_edit(edit_type='delete',
                           user=user,
                           note=self.displayname)
            super(Resource, self).delete()

        return permit_deletion
Example #16
0
    def append_dsl(self, search_results_object, permitted_nodegroups,
                   include_provisional):
        nested_agg = NestedAgg(path='points', name='geo_aggs')
        nested_agg_filter = FiltersAgg(name='inner')
        geo_agg_filter = Bool()

        if include_provisional is True:
            geo_agg_filter.filter(
                Terms(field='points.provisional', terms=['false', 'true']))

        else:
            if include_provisional is False:
                geo_agg_filter.filter(
                    Terms(field='points.provisional', terms=['false']))

            elif include_provisional is 'only provisional':
                geo_agg_filter.filter(
                    Terms(field='points.provisional', terms=['true']))

        geo_agg_filter.filter(
            Terms(field='points.nodegroup_id', terms=permitted_nodegroups))
        nested_agg_filter.add_filter(geo_agg_filter)
        nested_agg_filter.add_aggregation(
            GeoHashGridAgg(field='points.point',
                           name='grid',
                           precision=settings.HEX_BIN_PRECISION))
        nested_agg_filter.add_aggregation(
            GeoBoundsAgg(field='points.point', name='bounds'))
        nested_agg.add_aggregation(nested_agg_filter)
        search_results_object['query'].add_aggregation(nested_agg)
Example #17
0
    def create_nested_geo_filter(self, geometry):

        ## process GEOS geometry object into geojson and create ES filter
        geojson_geom = JSONDeserializer().deserialize(geometry.geojson)
        geoshape = GeoShape(
            field="geometries.geom.features.geometry",
            type=geojson_geom["type"],
            coordinates=geojson_geom["coordinates"]
        )

        new_spatial_filter = Bool()
        new_spatial_filter.filter(geoshape)
        nested = Nested(path='geometries', query=new_spatial_filter)
        return nested
Example #18
0
 def gen_range_agg(gte=None, lte=None, permitted_nodegroups=None):
     date_query = Bool()
     date_query.filter(
         Range(field="dates.date",
               gte=gte,
               lte=lte,
               relation="intersects"))
     if permitted_nodegroups is not None:
         date_query.filter(
             Terms(field="dates.nodegroup_id",
                   terms=permitted_nodegroups))
     date_ranges_query = Bool()
     date_ranges_query.filter(
         Range(field="date_ranges.date_range",
               gte=gte,
               lte=lte,
               relation="intersects"))
     if permitted_nodegroups is not None:
         date_ranges_query.filter(
             Terms(field="date_ranges.nodegroup_id",
                   terms=permitted_nodegroups))
     wrapper_query = Bool()
     wrapper_query.should(
         Nested(path="date_ranges", query=date_ranges_query))
     wrapper_query.should(Nested(path="dates", query=date_query))
     return wrapper_query
Example #19
0
 def gen_range_agg(gte=None, lte=None, permitted_nodegroups=None):
     date_query = Bool()
     date_query.filter(
         Range(field='dates.date',
               gte=gte,
               lte=lte,
               relation='intersects'))
     if permitted_nodegroups:
         date_query.filter(
             Terms(field='dates.nodegroup_id',
                   terms=permitted_nodegroups))
     date_ranges_query = Bool()
     date_ranges_query.filter(
         Range(field='date_ranges.date_range',
               gte=gte,
               lte=lte,
               relation='intersects'))
     if permitted_nodegroups:
         date_ranges_query.filter(
             Terms(field='date_ranges.nodegroup_id',
                   terms=permitted_nodegroups))
     wrapper_query = Bool()
     wrapper_query.should(
         Nested(path='date_ranges', query=date_ranges_query))
     wrapper_query.should(Nested(path='dates', query=date_query))
     return wrapper_query
Example #20
0
    def append_dsl(self, search_results_object, permitted_nodegroups,
                   include_provisional):
        nested_agg = NestedAgg(path="points", name="geo_aggs")
        nested_agg_filter = FiltersAgg(name="inner")
        geo_agg_filter = Bool()

        if include_provisional is True:
            geo_agg_filter.filter(
                Terms(field="points.provisional", terms=["false", "true"]))

        else:
            if include_provisional is False:
                geo_agg_filter.filter(
                    Terms(field="points.provisional", terms=["false"]))

            elif include_provisional is "only provisional":
                geo_agg_filter.filter(
                    Terms(field="points.provisional", terms=["true"]))

        geo_agg_filter.filter(
            Terms(field="points.nodegroup_id", terms=permitted_nodegroups))
        nested_agg_filter.add_filter(geo_agg_filter)
        nested_agg_filter.add_aggregation(
            GeoHashGridAgg(field="points.point",
                           name="grid",
                           precision=settings.HEX_BIN_PRECISION))
        nested_agg_filter.add_aggregation(
            GeoBoundsAgg(field="points.point", name="bounds"))
        nested_agg.add_aggregation(nested_agg_filter)
        search_results_object["query"].add_aggregation(nested_agg)
Example #21
0
 def append_null_search_filters(self, value, node, query, request):
     """
     Appends the search query dsl to search for fields that haven't been populated
     """
     base_query = Bool()
     null_query = Bool()
     data_exists_query = Exists(field="tiles.data.%s" % (str(node.pk)))
     nested_query = Nested(path="tiles", query=data_exists_query)
     null_query.must(nested_query)
     base_query.filter(Terms(field="graph_id", terms=[str(node.graph_id)]))
     if value["op"] == "null":
         base_query.must_not(null_query)
     elif value["op"] == "not_null":
         base_query.must(null_query)
     query.must(base_query)
Example #22
0
    def delete(self, *args, **kwargs):
        se = SearchEngineFactory().create()
        request = kwargs.pop("request", None)
        index = kwargs.pop("index", True)
        transaction_id = kwargs.pop("index", None)
        provisional_edit_log_details = kwargs.pop(
            "provisional_edit_log_details", None)
        for tile in self.tiles:
            tile.delete(*args, request=request, **kwargs)
        try:
            user = request.user
            user_is_reviewer = user_is_resource_reviewer(user)
        except AttributeError:  # no user
            user = None
            user_is_reviewer = True

        if user_is_reviewer is True or self.user_owns_provisional(user):
            if index:
                query = Query(se)
                bool_query = Bool()
                bool_query.filter(Terms(field="tileid", terms=[self.tileid]))
                query.add_query(bool_query)
                results = query.delete(index=TERMS_INDEX)

            self.__preDelete(request)
            self.save_edit(
                user=user,
                edit_type="tile delete",
                old_value=self.data,
                provisional_edit_log_details=provisional_edit_log_details,
                transaction_id=transaction_id,
            )
            try:
                super(Tile, self).delete(*args, **kwargs)
                for nodeid in self.data.keys():
                    node = models.Node.objects.get(nodeid=nodeid)
                    datatype = self.datatype_factory.get_instance(
                        node.datatype)
                    datatype.post_tile_delete(self, nodeid, index=index)
                if index:
                    self.index()
            except IntegrityError as e:
                logger.error(e)

        else:
            self.apply_provisional_edit(user, data={}, action="delete")
            super(Tile, self).save(*args, **kwargs)
Example #23
0
    def append_dsl(self, search_results_object, permitted_nodegroups,
                   include_provisional):
        search_query = Bool()
        querysting_params = self.request.GET.get(details["componentname"], "")

        graph_ids = []
        for resouceTypeFilter in JSONDeserializer().deserialize(
                querysting_params):
            graph_ids.append(str(resouceTypeFilter["graphid"]))

        terms = Terms(field="graph_id", terms=graph_ids)
        if resouceTypeFilter["inverted"] is True:
            search_query.must_not(terms)
        else:
            search_query.filter(terms)

        search_results_object["query"].add_query(search_query)
Example #24
0
    def append_dsl(self, search_results_object, permitted_nodegroups,
                   include_provisional):
        search_query = Bool()
        if include_provisional is not True:
            provisional_resource_filter = Bool()

            if include_provisional is False:
                provisional_resource_filter.filter(
                    Terms(field='provisional_resource',
                          terms=['false', 'partial']))

            elif include_provisional is 'only provisional':
                provisional_resource_filter.filter(
                    Terms(field='provisional_resource',
                          terms=['true', 'partial']))

            search_query.must(provisional_resource_filter)
            search_results_object['query'].add_query(search_query)
Example #25
0
    def append_dsl(self, search_results_object, permitted_nodegroups,
                   include_provisional):
        search_query = Bool()

        if include_provisional is not True:
            provisional_resource_filter = Bool()

            if include_provisional is False:
                provisional_resource_filter.filter(
                    Terms(field="provisional_resource",
                          terms=["false", "partial"]))

            elif include_provisional == "only provisional":
                provisional_resource_filter.filter(
                    Terms(field="provisional_resource",
                          terms=["true", "partial"]))

            search_query.must(provisional_resource_filter)
            search_results_object["query"].add_query(search_query)
Example #26
0
    def delete(self, *args, **kwargs):
        se = SearchEngineFactory().create()
        request = kwargs.pop('request', None)
        for tiles in self.tiles.itervalues():
            for tile in tiles:
                tile.delete(*args, request=request, **kwargs)

        query = Query(se)
        bool_query = Bool()
        bool_query.filter(Terms(field='tileid', terms=[self.tileid]))
        query.add_query(bool_query)
        results = query.search(index='strings', doc_type='term')['hits']['hits']
        for result in results:
            se.delete(index='strings', doc_type='term', id=result['_id'])

        self.__preDelete(request)
        super(Tile, self).delete(*args, **kwargs)
        resource = Resource.objects.get(resourceinstanceid=self.resourceinstance.resourceinstanceid)
        resource.index()
Example #27
0
    def delete(self):
        """
        Deletes a single resource and any related indexed data

        """

        se = SearchEngineFactory().create()
        related_resources = self.get_related_resources(lang="en-US", start=0, limit=15)
        for rr in related_resources['resource_relationships']:
            models.ResourceXResource.objects.get(pk=rr['resourcexid']).delete()
        query = Query(se)
        bool_query = Bool()
        bool_query.filter(Terms(field='resourceinstanceid', terms=[self.resourceinstanceid]))
        query.add_query(bool_query)
        results = query.search(index='strings', doc_type='term')['hits']['hits']
        for result in results:
            se.delete(index='strings', doc_type='term', id=result['_id'])
        se.delete(index='resource', doc_type=str(self.graph_id), id=self.resourceinstanceid)
        super(Resource, self).delete()
Example #28
0
    def delete(self, *args, **kwargs):
        se = SearchEngineFactory().create()
        request = kwargs.pop('request', None)
        for tiles in self.tiles.itervalues():
            for tile in tiles:
                tile.delete(*args, request=request, **kwargs)

        query = Query(se)
        bool_query = Bool()
        bool_query.filter(Terms(field='tileid', terms=[self.tileid]))
        query.add_query(bool_query)
        results = query.search(index='strings', doc_type='term')['hits']['hits']
        for result in results:
            se.delete(index='strings', doc_type='term', id=result['_id'])

        self.__preDelete(request)
        self.save_edit(user=request.user, edit_type='tile delete', old_value=self.data)
        super(Tile, self).delete(*args, **kwargs)
        resource = Resource.objects.get(resourceinstanceid=self.resourceinstance.resourceinstanceid)
        resource.index()
Example #29
0
 def gen_range_agg(gte=None, lte=None, permitted_nodegroups=None):
     date_query = Bool()
     date_query.filter(Range(field='dates.date', gte=gte, lte=lte, relation='intersects'))
     if permitted_nodegroups:
         date_query.filter(Terms(field='dates.nodegroup_id', terms=permitted_nodegroups))
     date_ranges_query = Bool()
     date_ranges_query.filter(Range(field='date_ranges.date_range', gte=gte, lte=lte, relation='intersects'))
     if permitted_nodegroups:
         date_ranges_query.filter(Terms(field='date_ranges.nodegroup_id', terms=permitted_nodegroups))
     wrapper_query = Bool()
     wrapper_query.should(Nested(path='date_ranges', query=date_ranges_query))
     wrapper_query.should(Nested(path='dates', query=date_query))
     return wrapper_query
Example #30
0
    def delete_index(self, resourceinstanceid=None):
        """
        Deletes all references to a resource from all indexes

        Keyword Arguments:
        resourceinstanceid -- the resource instance id to delete from related indexes, if supplied will use this over self.resourceinstanceid
        """

        if resourceinstanceid is None:
            resourceinstanceid = self.resourceinstanceid
        resourceinstanceid = str(resourceinstanceid)

        # delete any related terms
        query = Query(se)
        bool_query = Bool()
        bool_query.filter(Terms(field="resourceinstanceid", terms=[resourceinstanceid]))
        query.add_query(bool_query)
        query.delete(index=TERMS_INDEX)

        # delete any related resource index entries
        query = Query(se)
        bool_query = Bool()
        bool_query.should(Terms(field="resourceinstanceidto", terms=[resourceinstanceid]))
        bool_query.should(Terms(field="resourceinstanceidfrom", terms=[resourceinstanceid]))
        query.add_query(bool_query)
        query.delete(index=RESOURCE_RELATIONS_INDEX)

        # reindex any related resources
        query = Query(se)
        bool_query = Bool()
        bool_query.filter(Nested(path="ids", query=Terms(field="ids.id", terms=[resourceinstanceid])))
        query.add_query(bool_query)
        results = query.search(index=RESOURCES_INDEX)["hits"]["hits"]
        for result in results:
            res = Resource.objects.get(pk=result["_id"])
            res.load_tiles()
            res.index()

        # delete resource index
        se.delete(index=RESOURCES_INDEX, id=resourceinstanceid)
Example #31
0
    def append_search_filters(self, value, node, query, request):
        try:
            if value["op"] == "null" or value["op"] == "not_null":
                self.append_null_search_filters(value, node, query, request)
            elif value["val"] != "":
                base_query = Bool()
                base_query.filter(
                    Terms(field="graph_id", terms=[str(node.graph_id)]))
                match_query = Nested(path="tiles",
                                     query=Match(field="tiles.data.%s" %
                                                 (str(node.pk)),
                                                 type="phrase",
                                                 query=value["val"]))
                if "!" in value["op"]:
                    base_query.must_not(match_query)
                    # base_query.filter(Exists(field="tiles.data.%s" % (str(node.pk))))
                else:
                    base_query.must(match_query)
                query.must(base_query)

        except KeyError as e:
            pass
Example #32
0
        def get_relations(resourceinstanceid,
                          start,
                          limit,
                          resourceinstance_graphid=None):
            query = Query(se, start=start, limit=limit)
            bool_filter = Bool()
            bool_filter.should(
                Terms(field="resourceinstanceidfrom",
                      terms=resourceinstanceid))
            bool_filter.should(
                Terms(field="resourceinstanceidto", terms=resourceinstanceid))

            if resourceinstance_graphid:
                graph_filter = Bool()
                to_graph_id_filter = Bool()
                to_graph_id_filter.filter(
                    Terms(field="resourceinstancefrom_graphid",
                          terms=str(self.graph_id)))
                to_graph_id_filter.filter(
                    Terms(field="resourceinstanceto_graphid",
                          terms=resourceinstance_graphid))
                graph_filter.should(to_graph_id_filter)

                from_graph_id_filter = Bool()
                from_graph_id_filter.filter(
                    Terms(field="resourceinstancefrom_graphid",
                          terms=resourceinstance_graphid))
                from_graph_id_filter.filter(
                    Terms(field="resourceinstanceto_graphid",
                          terms=str(self.graph_id)))
                graph_filter.should(from_graph_id_filter)
                bool_filter.must(graph_filter)

            query.add_query(bool_filter)

            return query.search(index=RESOURCE_RELATIONS_INDEX)
Example #33
0
def search_terms(request):
    lang = request.GET.get("lang", settings.LANGUAGE_CODE)
    se = SearchEngineFactory().create()
    searchString = request.GET.get("q", "")
    user_is_reviewer = user_is_resource_reviewer(request.user)

    i = 0
    ret = {}
    for index in ["terms", "concepts"]:
        query = Query(se, start=0, limit=0)
        boolquery = Bool()
        boolquery.should(
            Match(field="value",
                  query=searchString.lower(),
                  type="phrase_prefix"))
        boolquery.should(
            Match(field="value.folded",
                  query=searchString.lower(),
                  type="phrase_prefix"))
        boolquery.should(
            Match(field="value.folded",
                  query=searchString.lower(),
                  fuzziness="AUTO",
                  prefix_length=settings.SEARCH_TERM_SENSITIVITY))

        if user_is_reviewer is False and index == "terms":
            boolquery.filter(Terms(field="provisional", terms=["false"]))

        query.add_query(boolquery)
        base_agg = Aggregation(name="value_agg",
                               type="terms",
                               field="value.raw",
                               size=settings.SEARCH_DROPDOWN_LENGTH,
                               order={"max_score": "desc"})
        nodegroupid_agg = Aggregation(name="nodegroupid",
                                      type="terms",
                                      field="nodegroupid")
        top_concept_agg = Aggregation(name="top_concept",
                                      type="terms",
                                      field="top_concept")
        conceptid_agg = Aggregation(name="conceptid",
                                    type="terms",
                                    field="conceptid")
        max_score_agg = MaxAgg(name="max_score", script="_score")

        top_concept_agg.add_aggregation(conceptid_agg)
        base_agg.add_aggregation(max_score_agg)
        base_agg.add_aggregation(top_concept_agg)
        base_agg.add_aggregation(nodegroupid_agg)
        query.add_aggregation(base_agg)

        ret[index] = []
        results = query.search(index=index)
        if results is not None:
            for result in results["aggregations"]["value_agg"]["buckets"]:
                if len(result["top_concept"]["buckets"]) > 0:
                    for top_concept in result["top_concept"]["buckets"]:
                        top_concept_id = top_concept["key"]
                        top_concept_label = get_preflabel_from_conceptid(
                            top_concept["key"], lang)["value"]
                        for concept in top_concept["conceptid"]["buckets"]:
                            ret[index].append({
                                "type": "concept",
                                "context": top_concept_id,
                                "context_label": top_concept_label,
                                "id": i,
                                "text": result["key"],
                                "value": concept["key"],
                            })
                        i = i + 1
                else:
                    ret[index].append({
                        "type":
                        "term",
                        "context":
                        "",
                        "context_label":
                        get_resource_model_label(result),
                        "id":
                        i,
                        "text":
                        result["key"],
                        "value":
                        result["key"],
                    })
                    i = i + 1

    return JSONResponse(ret)
Example #34
0
def build_search_results_dsl(request):
    term_filter = request.GET.get('termFilter', '')
    spatial_filter = JSONDeserializer().deserialize(
        request.GET.get('mapFilter', '{}'))
    include_provisional = get_provisional_type(request)

    export = request.GET.get('export', None)
    mobile_download = request.GET.get('mobiledownload', None)
    page = 1 if request.GET.get('page') == '' else int(
        request.GET.get('page', 1))
    temporal_filter = JSONDeserializer().deserialize(
        request.GET.get('temporalFilter', '{}'))
    advanced_filters = JSONDeserializer().deserialize(
        request.GET.get('advanced', '[]'))
    search_buffer = None
    se = SearchEngineFactory().create()

    if export != None:
        limit = settings.SEARCH_EXPORT_ITEMS_PER_PAGE
    elif mobile_download != None:
        limit = settings.MOBILE_DOWNLOAD_RESOURCE_LIMIT
    else:
        limit = settings.SEARCH_ITEMS_PER_PAGE

    query = Query(se, start=limit * int(page - 1), limit=limit)
    search_query = Bool()

    nested_agg = NestedAgg(path='points', name='geo_aggs')
    nested_agg_filter = FiltersAgg(name='inner')

    if include_provisional == True:
        nested_agg_filter.add_filter(
            Terms(field='points.provisional', terms=['false', 'true']))

    else:
        provisional_resource_filter = Bool()

        if include_provisional == False:
            provisional_resource_filter.filter(
                Terms(field='provisional', terms=['false', 'partial']))
            nested_agg_filter.add_filter(
                Terms(field='points.provisional', terms=['false']))

        elif include_provisional == 'only provisional':
            provisional_resource_filter.filter(
                Terms(field='provisional', terms=['true', 'partial']))
            nested_agg_filter.add_filter(
                Terms(field='points.provisional', terms=['true']))

        search_query.must(provisional_resource_filter)

    nested_agg_filter.add_aggregation(
        GeoHashGridAgg(field='points.point',
                       name='grid',
                       precision=settings.HEX_BIN_PRECISION))
    nested_agg_filter.add_aggregation(
        GeoBoundsAgg(field='points.point', name='bounds'))
    nested_agg.add_aggregation(nested_agg_filter)
    query.add_aggregation(nested_agg)

    permitted_nodegroups = get_permitted_nodegroups(request.user)

    if term_filter != '':
        for term in JSONDeserializer().deserialize(term_filter):
            term_query = Bool()
            provisional_term_filter = Bool()
            if term['type'] == 'term' or term['type'] == 'string':
                string_filter = Bool()
                if term['type'] == 'term':
                    string_filter.must(
                        Match(field='strings.string',
                              query=term['value'],
                              type='phrase'))
                elif term['type'] == 'string':
                    string_filter.should(
                        Match(field='strings.string',
                              query=term['value'],
                              type='phrase_prefix'))
                    string_filter.should(
                        Match(field='strings.string.folded',
                              query=term['value'],
                              type='phrase_prefix'))

                if include_provisional == False:
                    string_filter.must_not(
                        Match(field='strings.provisional',
                              query='true',
                              type='phrase'))
                elif include_provisional == 'only provisional':
                    string_filter.must_not(
                        Match(field='strings.provisional',
                              query='false',
                              type='phrase'))

                string_filter.filter(
                    Terms(field='strings.nodegroup_id',
                          terms=permitted_nodegroups))
                nested_string_filter = Nested(path='strings',
                                              query=string_filter)
                if term['inverted']:
                    search_query.must_not(nested_string_filter)
                else:
                    search_query.must(nested_string_filter)
                    # need to set min_score because the query returns results with score 0 and those have to be removed, which I don't think it should be doing
                    query.min_score('0.01')
            elif term['type'] == 'concept':
                concept_ids = _get_child_concepts(term['value'])
                conceptid_filter = Bool()
                conceptid_filter.filter(
                    Terms(field='domains.conceptid', terms=concept_ids))
                conceptid_filter.filter(
                    Terms(field='domains.nodegroup_id',
                          terms=permitted_nodegroups))

                if include_provisional == False:
                    conceptid_filter.must_not(
                        Match(field='domains.provisional',
                              query='true',
                              type='phrase'))
                elif include_provisional == 'only provisional':
                    conceptid_filter.must_not(
                        Match(field='domains.provisional',
                              query='false',
                              type='phrase'))

                nested_conceptid_filter = Nested(path='domains',
                                                 query=conceptid_filter)
                if term['inverted']:
                    search_query.must_not(nested_conceptid_filter)
                else:
                    search_query.filter(nested_conceptid_filter)

    if 'features' in spatial_filter:

        if len(spatial_filter['features']) > 0:
            feature_geom = spatial_filter['features'][0]['geometry']
            feature_properties = {}
            if 'properties' in spatial_filter['features'][0]:
                feature_properties = spatial_filter['features'][0][
                    'properties']
            buffer = {'width': 0, 'unit': 'ft'}
            if 'buffer' in feature_properties:
                buffer = feature_properties['buffer']
            search_buffer = _buffer(feature_geom, buffer['width'],
                                    buffer['unit'])
            feature_geom = JSONDeserializer().deserialize(search_buffer.json)
            geoshape = GeoShape(field='geometries.geom.features.geometry',
                                type=feature_geom['type'],
                                coordinates=feature_geom['coordinates'])

            invert_spatial_search = False
            if 'inverted' in feature_properties:
                invert_spatial_search = feature_properties['inverted']

            spatial_query = Bool()
            if invert_spatial_search == True:
                spatial_query.must_not(geoshape)
            else:
                spatial_query.filter(geoshape)

            # get the nodegroup_ids that the user has permission to search
            spatial_query.filter(
                Terms(field='geometries.nodegroup_id',
                      terms=permitted_nodegroups))

            if include_provisional == False:
                spatial_query.filter(
                    Terms(field='geometries.provisional', terms=['false']))

            elif include_provisional == 'only provisional':
                spatial_query.filter(
                    Terms(field='geometries.provisional', terms=['true']))

            search_query.filter(Nested(path='geometries', query=spatial_query))

    if 'fromDate' in temporal_filter and 'toDate' in temporal_filter:
        now = str(datetime.utcnow())
        start_date = ExtendedDateFormat(temporal_filter['fromDate'])
        end_date = ExtendedDateFormat(temporal_filter['toDate'])
        date_nodeid = str(
            temporal_filter['dateNodeId']
        ) if 'dateNodeId' in temporal_filter and temporal_filter[
            'dateNodeId'] != '' else None
        query_inverted = False if 'inverted' not in temporal_filter else temporal_filter[
            'inverted']

        temporal_query = Bool()

        if query_inverted:
            # inverted date searches need to use an OR clause and are generally more complicated to structure (can't use ES must_not)
            # eg: less than START_DATE OR greater than END_DATE
            inverted_date_query = Bool()
            inverted_date_ranges_query = Bool()

            if start_date.is_valid():
                inverted_date_query.should(
                    Range(field='dates.date', lt=start_date.lower))
                inverted_date_ranges_query.should(
                    Range(field='date_ranges.date_range', lt=start_date.lower))
            if end_date.is_valid():
                inverted_date_query.should(
                    Range(field='dates.date', gt=end_date.upper))
                inverted_date_ranges_query.should(
                    Range(field='date_ranges.date_range', gt=end_date.upper))

            date_query = Bool()
            date_query.filter(inverted_date_query)
            date_query.filter(
                Terms(field='dates.nodegroup_id', terms=permitted_nodegroups))

            if include_provisional == False:
                date_query.filter(
                    Terms(field='dates.provisional', terms=['false']))

            elif include_provisional == 'only provisional':
                date_query.filter(
                    Terms(field='dates.provisional', terms=['true']))

            if date_nodeid:
                date_query.filter(Term(field='dates.nodeid', term=date_nodeid))
            else:
                date_ranges_query = Bool()
                date_ranges_query.filter(inverted_date_ranges_query)
                date_ranges_query.filter(
                    Terms(field='date_ranges.nodegroup_id',
                          terms=permitted_nodegroups))

                if include_provisional == False:
                    date_ranges_query.filter(
                        Terms(field='date_ranges.provisional',
                              terms=['false']))

                elif include_provisional == 'only provisional':
                    date_ranges_query.filter(
                        Terms(field='date_ranges.provisional', terms=['true']))

                temporal_query.should(
                    Nested(path='date_ranges', query=date_ranges_query))
            temporal_query.should(Nested(path='dates', query=date_query))

        else:
            date_query = Bool()
            date_query.filter(
                Range(field='dates.date',
                      gte=start_date.lower,
                      lte=end_date.upper))
            date_query.filter(
                Terms(field='dates.nodegroup_id', terms=permitted_nodegroups))

            if include_provisional == False:
                date_query.filter(
                    Terms(field='dates.provisional', terms=['false']))
            elif include_provisional == 'only provisional':
                date_query.filter(
                    Terms(field='dates.provisional', terms=['true']))

            if date_nodeid:
                date_query.filter(Term(field='dates.nodeid', term=date_nodeid))
            else:
                date_ranges_query = Bool()
                date_ranges_query.filter(
                    Range(field='date_ranges.date_range',
                          gte=start_date.lower,
                          lte=end_date.upper,
                          relation='intersects'))
                date_ranges_query.filter(
                    Terms(field='date_ranges.nodegroup_id',
                          terms=permitted_nodegroups))

                if include_provisional == False:
                    date_ranges_query.filter(
                        Terms(field='date_ranges.provisional',
                              terms=['false']))
                if include_provisional == 'only provisional':
                    date_ranges_query.filter(
                        Terms(field='date_ranges.provisional', terms=['true']))

                temporal_query.should(
                    Nested(path='date_ranges', query=date_ranges_query))
            temporal_query.should(Nested(path='dates', query=date_query))

        search_query.filter(temporal_query)

    datatype_factory = DataTypeFactory()
    if len(advanced_filters) > 0:
        advanced_query = Bool()
        grouped_query = Bool()
        grouped_queries = [grouped_query]
        for index, advanced_filter in enumerate(advanced_filters):
            tile_query = Bool()
            for key, val in advanced_filter.iteritems():
                if key != 'op':
                    node = models.Node.objects.get(pk=key)
                    if request.user.has_perm('read_nodegroup', node.nodegroup):
                        datatype = datatype_factory.get_instance(node.datatype)
                        datatype.append_search_filters(val, node, tile_query,
                                                       request)
            nested_query = Nested(path='tiles', query=tile_query)
            if advanced_filter['op'] == 'or' and index != 0:
                grouped_query = Bool()
                grouped_queries.append(grouped_query)
            grouped_query.must(nested_query)
        for grouped_query in grouped_queries:
            advanced_query.should(grouped_query)
        search_query.must(advanced_query)

    query.add_query(search_query)
    if search_buffer != None:
        search_buffer = search_buffer.geojson

    return {'query': query, 'search_buffer': search_buffer}
Example #35
0
def build_search_results_dsl(request):
    term_filter = request.GET.get('termFilter', '')
    spatial_filter = JSONDeserializer().deserialize(request.GET.get('mapFilter', '{}'))
    export = request.GET.get('export', None)
    page = 1 if request.GET.get('page') == '' else int(request.GET.get('page', 1))
    temporal_filter = JSONDeserializer().deserialize(request.GET.get('temporalFilter', '{}'))
    advanced_filters = JSONDeserializer().deserialize(request.GET.get('advanced', '[]'))
    search_buffer = None
    se = SearchEngineFactory().create()

    if export != None:
        limit = settings.SEARCH_EXPORT_ITEMS_PER_PAGE
    else:
        limit = settings.SEARCH_ITEMS_PER_PAGE

    query = Query(se, start=limit*int(page-1), limit=limit)
    nested_agg = NestedAgg(path='points', name='geo_aggs')
    nested_agg.add_aggregation(GeoHashGridAgg(field='points.point', name='grid', precision=settings.HEX_BIN_PRECISION))
    nested_agg.add_aggregation(GeoBoundsAgg(field='points.point', name='bounds'))
    query.add_aggregation(nested_agg)

    search_query = Bool()
    permitted_nodegroups = get_permitted_nodegroups(request.user)

    if term_filter != '':
        for term in JSONDeserializer().deserialize(term_filter):
            term_query = Bool()
            if term['type'] == 'term' or term['type'] == 'string':
                string_filter = Bool()
                if term['type'] == 'term':
                    string_filter.must(Match(field='strings.string', query=term['value'], type='phrase'))
                elif term['type'] == 'string':
                    string_filter.should(Match(field='strings.string', query=term['value'], type='phrase_prefix'))
                    string_filter.should(Match(field='strings.string.folded', query=term['value'], type='phrase_prefix'))

                string_filter.filter(Terms(field='strings.nodegroup_id', terms=permitted_nodegroups))
                nested_string_filter = Nested(path='strings', query=string_filter)
                if term['inverted']:
                    search_query.must_not(nested_string_filter)
                else:
                    search_query.must(nested_string_filter)
                    # need to set min_score because the query returns results with score 0 and those have to be removed, which I don't think it should be doing
                    query.min_score('0.01')
            elif term['type'] == 'concept':
                concept_ids = _get_child_concepts(term['value'])
                conceptid_filter = Bool()
                conceptid_filter.filter(Terms(field='domains.conceptid', terms=concept_ids))
                conceptid_filter.filter(Terms(field='domains.nodegroup_id', terms=permitted_nodegroups))
                nested_conceptid_filter = Nested(path='domains', query=conceptid_filter)
                if term['inverted']:
                    search_query.must_not(nested_conceptid_filter)
                else:
                    search_query.filter(nested_conceptid_filter)

    if 'features' in spatial_filter:
        if len(spatial_filter['features']) > 0:
            feature_geom = spatial_filter['features'][0]['geometry']
            feature_properties = spatial_filter['features'][0]['properties']
            buffer = {'width':0,'unit':'ft'}
            if 'buffer' in feature_properties:
                buffer = feature_properties['buffer']
            search_buffer = _buffer(feature_geom, buffer['width'], buffer['unit'])
            feature_geom = JSONDeserializer().deserialize(search_buffer.json)
            geoshape = GeoShape(field='geometries.geom.features.geometry', type=feature_geom['type'], coordinates=feature_geom['coordinates'] )

            invert_spatial_search = False
            if 'inverted' in feature_properties:
                invert_spatial_search = feature_properties['inverted']

            spatial_query = Bool()
            if invert_spatial_search == True:
                spatial_query.must_not(geoshape)
            else:
                spatial_query.filter(geoshape)

            # get the nodegroup_ids that the user has permission to search
            spatial_query.filter(Terms(field='geometries.nodegroup_id', terms=permitted_nodegroups))
            search_query.filter(Nested(path='geometries', query=spatial_query))

    if 'fromDate' in temporal_filter and 'toDate' in temporal_filter:
        now = str(datetime.utcnow())
        start_date = SortableDate(temporal_filter['fromDate'])
        end_date = SortableDate(temporal_filter['toDate'])
        date_nodeid = str(temporal_filter['dateNodeId']) if 'dateNodeId' in temporal_filter and temporal_filter['dateNodeId'] != '' else None
        query_inverted = False if 'inverted' not in temporal_filter else temporal_filter['inverted']

        temporal_query = Bool()

        if query_inverted:
            # inverted date searches need to use an OR clause and are generally more complicated to structure (can't use ES must_not)
            # eg: less than START_DATE OR greater than END_DATE
            inverted_date_query = Bool()
            inverted_date_ranges_query = Bool()

            if start_date.is_valid():
                inverted_date_query.should(Range(field='dates.date', lt=start_date.as_float()))
                inverted_date_ranges_query.should(Range(field='date_ranges.date_range', lt=start_date.as_float()))
            if end_date.is_valid():
                inverted_date_query.should(Range(field='dates.date', gt=end_date.as_float()))
                inverted_date_ranges_query.should(Range(field='date_ranges.date_range', gt=end_date.as_float()))

            date_query = Bool()
            date_query.filter(inverted_date_query)
            date_query.filter(Terms(field='dates.nodegroup_id', terms=permitted_nodegroups))
            if date_nodeid:
                date_query.filter(Term(field='dates.nodeid', term=date_nodeid))
            else:
                date_ranges_query = Bool()
                date_ranges_query.filter(inverted_date_ranges_query)
                date_ranges_query.filter(Terms(field='date_ranges.nodegroup_id', terms=permitted_nodegroups))
                temporal_query.should(Nested(path='date_ranges', query=date_ranges_query))
            temporal_query.should(Nested(path='dates', query=date_query))

        else:
            date_query = Bool()
            date_query.filter(Range(field='dates.date', gte=start_date.as_float(), lte=end_date.as_float()))
            date_query.filter(Terms(field='dates.nodegroup_id', terms=permitted_nodegroups))
            if date_nodeid:
                date_query.filter(Term(field='dates.nodeid', term=date_nodeid))
            else:
                date_ranges_query = Bool()
                date_ranges_query.filter(Range(field='date_ranges.date_range', gte=start_date.as_float(), lte=end_date.as_float(), relation='intersects'))
                date_ranges_query.filter(Terms(field='date_ranges.nodegroup_id', terms=permitted_nodegroups))
                temporal_query.should(Nested(path='date_ranges', query=date_ranges_query))
            temporal_query.should(Nested(path='dates', query=date_query))


        search_query.filter(temporal_query)
        #print search_query.dsl

    datatype_factory = DataTypeFactory()
    if len(advanced_filters) > 0:
        advanced_query = Bool()
        grouped_query = Bool()
        grouped_queries = [grouped_query]
        for index, advanced_filter in enumerate(advanced_filters):
            tile_query = Bool()
            for key, val in advanced_filter.iteritems():
                if key != 'op':
                    node = models.Node.objects.get(pk=key)
                    if request.user.has_perm('read_nodegroup', node.nodegroup):
                        datatype = datatype_factory.get_instance(node.datatype)
                        datatype.append_search_filters(val, node, tile_query, request)
            nested_query = Nested(path='tiles', query=tile_query)
            if advanced_filter['op'] == 'or' and index != 0:
                grouped_query = Bool()
                grouped_queries.append(grouped_query)
            grouped_query.must(nested_query)
        for grouped_query in grouped_queries:
            advanced_query.should(grouped_query)
        search_query.must(advanced_query)

    query.add_query(search_query)
    if search_buffer != None:
        search_buffer = search_buffer.geojson
    return {'query': query, 'search_buffer':search_buffer}
    def append_dsl(self, search_results_object, permitted_nodegroups,
                   include_provisional):
        search_query = Bool()
        querysting_params = self.request.GET.get(details['componentname'], '')
        for term in JSONDeserializer().deserialize(querysting_params):
            if term['type'] == 'term' or term['type'] == 'string':
                string_filter = Bool()
                if term['type'] == 'term':
                    string_filter.must(
                        Match(field='strings.string',
                              query=term['value'],
                              type='phrase'))
                elif term['type'] == 'string':
                    string_filter.should(
                        Match(field='strings.string',
                              query=term['value'],
                              type='phrase_prefix'))
                    string_filter.should(
                        Match(field='strings.string.folded',
                              query=term['value'],
                              type='phrase_prefix'))

                if include_provisional is False:
                    string_filter.must_not(
                        Match(field='strings.provisional',
                              query='true',
                              type='phrase'))
                elif include_provisional == 'only provisional':
                    string_filter.must_not(
                        Match(field='strings.provisional',
                              query='false',
                              type='phrase'))

                string_filter.filter(
                    Terms(field='strings.nodegroup_id',
                          terms=permitted_nodegroups))
                nested_string_filter = Nested(path='strings',
                                              query=string_filter)
                if term['inverted']:
                    search_query.must_not(nested_string_filter)
                else:
                    search_query.must(nested_string_filter)
                    # need to set min_score because the query returns results with score 0 and those have to be removed, which I don't think it should be doing
                    search_results_object['query'].min_score('0.01')
            elif term['type'] == 'concept':
                concept_ids = _get_child_concepts(term['value'])
                conceptid_filter = Bool()
                conceptid_filter.filter(
                    Terms(field='domains.conceptid', terms=concept_ids))
                conceptid_filter.filter(
                    Terms(field='domains.nodegroup_id',
                          terms=permitted_nodegroups))

                if include_provisional is False:
                    conceptid_filter.must_not(
                        Match(field='domains.provisional',
                              query='true',
                              type='phrase'))
                elif include_provisional == 'only provisional':
                    conceptid_filter.must_not(
                        Match(field='domains.provisional',
                              query='false',
                              type='phrase'))

                nested_conceptid_filter = Nested(path='domains',
                                                 query=conceptid_filter)
                if term['inverted']:
                    search_query.must_not(nested_conceptid_filter)
                else:
                    search_query.filter(nested_conceptid_filter)

        search_results_object['query'].add_query(search_query)