Beispiel #1
0
 def get_relations(resourceinstanceid, start, limit):
     query = Query(se, start=start, limit=limit)
     bool_filter = Bool()
     bool_filter.should(Terms(field="resourceinstanceidfrom", terms=resourceinstanceid))
     bool_filter.should(Terms(field="resourceinstanceidto", terms=resourceinstanceid))
     query.add_query(bool_filter)
     return query.search(index="resource_relations")
Beispiel #2
0
 def delete_index(self):
     se = SearchEngineFactory().create()
     query = Query(se, start=0, limit=10000)
     phrase = Match(field='conceptid', query=self.conceptid, type='phrase')
     query.add_query(phrase)
     query.delete(index='concept_labels')
     se.delete_terms(self.id)
Beispiel #3
0
def reverse_func(apps, schema_editor):

    Node = apps.get_model("models", "Node")
    Edge = apps.get_model("models", "Edge")

    for node in Node.objects.all():
        node.ontologyclass = str(node.ontologyclass).split("/")[-1]
        node.save()

    for edge in Edge.objects.all():
        edge.ontologyproperty = str(edge.ontologyproperty).split("/")[-1]
        edge.save()

    # remove index for base Arches concept
    se = SearchEngineFactory().create()
    query = Query(se, start=0, limit=10000)
    query.add_query(
        Term(field="conceptid", term="00000000-0000-0000-0000-000000000001"))
    query.delete(index="concepts")

    try:
        DValueType = apps.get_model("models", "DValueType")
        DValueType.objects.get(valuetype="identifier").delete()
    except Exception:
        pass
Beispiel #4
0
def get_preflabel_from_conceptid(conceptid, lang):
    ret = None
    default = {
        "category": "",
        "conceptid": "",
        "language": "",
        "value": "",
        "type": "",
        "id": ""
    }
    se = SearchEngineFactory().create()
    query = Query(se)
    terms = Terms(field='conceptid', terms=[conceptid])
    match = Match(field='type', query='preflabel', type='phrase')
    query.add_filter(terms)
    query.add_query(match)
    preflabels = query.search(index='concept_labels')['hits']['hits']
    for preflabel in preflabels:
        default = preflabel['_source']
        # get the label in the preferred language, otherwise get the label in the default language
        if preflabel['_source']['language'] == lang:
            return preflabel['_source']
        if preflabel['_source']['language'].split('-')[0] == lang.split(
                '-')[0]:
            ret = preflabel['_source']
        if preflabel['_source'][
                'language'] == settings.LANGUAGE_CODE and ret == None:
            ret = preflabel['_source']
    return default if ret == None else ret
Beispiel #5
0
def reverse_func(apps, schema_editor):
    extensions = [os.path.join(settings.ONTOLOGY_PATH, x) for x in settings.ONTOLOGY_EXT]
    management.call_command('load_ontology', source=os.path.join(settings.ONTOLOGY_PATH, settings.ONTOLOGY_BASE),
        version=settings.ONTOLOGY_BASE_VERSION, ontology_name=settings.ONTOLOGY_BASE_NAME, id=settings.ONTOLOGY_BASE_ID, extensions=','.join(extensions), verbosity=0)

    Node = apps.get_model("models", "Node")
    Edge = apps.get_model("models", "Edge")

    for node in Node.objects.all():
        node.ontologyclass = str(node.ontologyclass).split('/')[-1]
        node.save()

    for edge in Edge.objects.all():
        edge.ontologyproperty = str(edge.ontologyproperty).split('/')[-1]
        edge.save()

    # remove index for base Arches concept
    se = SearchEngineFactory().create()
    query = Query(se, start=0, limit=10000)
    query.add_query(Term(field='conceptid', term='00000000-0000-0000-0000-000000000001'))
    query.delete(index='concepts')

    try:
        DValueType = apps.get_model("models", "DValueType")
        DValueType.objects.get(valuetype='identifier').delete()
    except:
        pass
Beispiel #6
0
 def delete_index(self):   
     se = SearchEngineFactory().create() 
     query = Query(se, start=0, limit=10000)
     phrase = Match(field='conceptid', query=self.conceptid, type='phrase')
     query.add_query(phrase)
     query.delete(index='concept_labels')  
     se.delete_terms(self.id)
Beispiel #7
0
def search_terms(request):
    lang = request.GET.get('lang', settings.LANGUAGE_CODE)
    se = SearchEngineFactory().create()
    searchString = request.GET.get('q', '')
    user_is_reviewer = request.user.groups.filter(name='Resource Reviewer').exists()

    i = 0
    ret = {}
    for index in ['terms', 'concepts']:
        query = Query(se, start=0, limit=0)
        boolquery = Bool()
        boolquery.should(Match(field='value', query=searchString.lower(), type='phrase_prefix'))
        boolquery.should(Match(field='value.folded', query=searchString.lower(), type='phrase_prefix'))
        boolquery.should(Match(field='value.folded', query=searchString.lower(), fuzziness='AUTO', prefix_length=settings.SEARCH_TERM_SENSITIVITY))

        if user_is_reviewer is False and index == 'terms':
            boolquery.filter(Terms(field='provisional', terms=['false']))

        query.add_query(boolquery)
        base_agg = Aggregation(name='value_agg', type='terms', field='value.raw', size=settings.SEARCH_DROPDOWN_LENGTH, order={"max_score": "desc"})
        nodegroupid_agg = Aggregation(name='nodegroupid', type='terms', field='nodegroupid')
        top_concept_agg = Aggregation(name='top_concept', type='terms', field='top_concept')
        conceptid_agg = Aggregation(name='conceptid', type='terms', field='conceptid')
        max_score_agg = MaxAgg(name='max_score', script='_score')

        top_concept_agg.add_aggregation(conceptid_agg)
        base_agg.add_aggregation(max_score_agg)
        base_agg.add_aggregation(top_concept_agg)
        base_agg.add_aggregation(nodegroupid_agg)
        query.add_aggregation(base_agg)

        ret[index] = []
        results = query.search(index=index)
        for result in results['aggregations']['value_agg']['buckets']:
            if len(result['top_concept']['buckets']) > 0:
                for top_concept in result['top_concept']['buckets']:
                    top_concept_id = top_concept['key']
                    top_concept_label = get_preflabel_from_conceptid(top_concept['key'], lang)['value']
                    for concept in top_concept['conceptid']['buckets']:
                        ret[index].append({
                            'type': 'concept',
                            'context': top_concept_id,
                            'context_label': top_concept_label,
                            'id': i,
                            'text': result['key'],
                            'value': concept['key']
                        })
                    i = i + 1
            else:
                ret[index].append({
                    'type': 'term',
                    'context': '',
                    'context_label': get_resource_model_label(result),
                    'id': i,
                    'text': result['key'],
                    'value': result['key']
                })
                i = i + 1

    return JSONResponse(ret)
Beispiel #8
0
        def get_relations(resourceinstanceid,
                          start,
                          limit,
                          resourceinstance_graphid=None):
            query = Query(se, start=start, limit=limit)
            bool_filter = Bool()
            bool_filter.should(
                Terms(field="resourceinstanceidfrom",
                      terms=resourceinstanceid))
            bool_filter.should(
                Terms(field="resourceinstanceidto", terms=resourceinstanceid))

            if resourceinstance_graphid:
                graph_id_filter = Bool()
                graph_id_filter.should(
                    Terms(field="resourceinstancefrom_graphid",
                          terms=resourceinstance_graphid))
                graph_id_filter.should(
                    Terms(field="resourceinstanceto_graphid",
                          terms=resourceinstance_graphid))
                bool_filter.must(graph_id_filter)

            query.add_query(bool_filter)

            return query.search(index=RESOURCE_RELATIONS_INDEX)
Beispiel #9
0
def get_preflabel_from_conceptid(conceptid, lang):
    ret = None
    default = {
        "category": "",
        "conceptid": "",
        "language": "",
        "value": "",
        "type": "",
        "id": ""
    }
    se = SearchEngineFactory().create()
    query = Query(se)
    terms = Terms(field='conceptid', terms=[conceptid])
    match = Match(field='type', query='preflabel', type='phrase')
    query.add_filter(terms)
    query.add_query(match)
    preflabels = query.search(index='concept_labels')['hits']['hits']
    for preflabel in preflabels:
        default = preflabel['_source']
        # get the label in the preferred language, otherwise get the label in the default language
        if preflabel['_source']['language'] == lang:
            return preflabel['_source']
        if preflabel['_source']['language'].split('-')[0] == lang.split('-')[0]:
            ret = preflabel['_source']
        if preflabel['_source']['language'] == settings.LANGUAGE_CODE and ret == None:
            ret = preflabel['_source']
    return default if ret == None else ret
Beispiel #10
0
 def get_relations(resourceinstanceid, start, limit):
     query = Query(se, start=start, limit=limit)
     bool_filter = Bool()
     bool_filter.should(Terms(field='resourceinstanceidfrom', terms=resourceinstanceid))
     bool_filter.should(Terms(field='resourceinstanceidto', terms=resourceinstanceid))
     query.add_query(bool_filter)
     return query.search(index='resource_relations', doc_type='all')
    def test_delete_by_query(self):
        """
        Test deleting documents by query in Elasticsearch

        """

        se = SearchEngineFactory().create()

        for i in range(10):
            x = {
                'id': i,
                'type': 'prefLabel',
                'value': 'test pref label',
            }
            se.index_data(index='test', body=x, idfield='id', refresh=True)
            y = {
                'id': i + 100,
                'type': 'altLabel',
                'value': 'test alt label',
            }
            se.index_data(index='test', body=y, idfield='id', refresh=True)

        time.sleep(1)

        query = Query(se, start=0, limit=100)
        match = Match(field='type', query='altLabel')
        query.add_query(match)

        query.delete(index='test', refresh=True)

        self.assertEqual(se.count(index='test'), 10)
Beispiel #12
0
def get_auto_filter(request):
    lang = request.GET.get('lang', settings.LANGUAGE_CODE)
    se1 = SearchEngineFactory().create()
    searchString1 = settings.PUBLISHED_LABEL
    query1 = Query(se1, start=0, limit=settings.SEARCH_DROPDOWN_LENGTH)
    boolquery1 = Bool()
    boolquery1.should(Match(field='term', query=searchString1.lower(), type='phrase_prefix', fuzziness='AUTO'))
    boolquery1.should(Match(field='term.folded', query=searchString1.lower(), type='phrase_prefix', fuzziness='AUTO'))
    boolquery1.should(Match(field='term.folded', query=searchString1.lower(), fuzziness='AUTO'))
    query1.add_query(boolquery1)
    results1 = query1.search(index='term', doc_type='value')
    conceptid1 = ''
    context1 = ''
    for result1 in results1['hits']['hits']:
        prefLabel = get_preflabel_from_conceptid(result1['_source']['context'], lang)
        result1['_source']['options']['context_label'] = prefLabel['value']
        if (prefLabel['value'] == settings.EW_STATUS_TERM and result1['_source']['term'] == settings.PUBLISHED_LABEL)  :
            conceptid1 = result1['_source']['options']['conceptid']
            context1 = result1['_source']['context']
    AUTO_TERM_FILTER = {"inverted": False, "type": "concept"}
    AUTO_TERM_FILTER["text"] = settings.PUBLISHED_LABEL
    AUTO_TERM_FILTER["value"] = conceptid1
    AUTO_TERM_FILTER["context"] = context1
    AUTO_TERM_FILTER["context_label"] = settings.EW_STATUS_TERM
    AUTO_TERM_FILTER["id"] = AUTO_TERM_FILTER['text'] + conceptid1
    return AUTO_TERM_FILTER
Beispiel #13
0
    def delete(self, *args, **kwargs):
        se = SearchEngineFactory().create()
        request = kwargs.pop("request", None)
        provisional_edit_log_details = kwargs.pop("provisional_edit_log_details", None)
        for tile in self.tiles:
            tile.delete(*args, request=request, **kwargs)
        try:
            user = request.user
            user_is_reviewer = user_is_resource_reviewer(user)
        except AttributeError:  # no user
            user = None
            user_is_reviewer = True

        if user_is_reviewer is True or self.user_owns_provisional(user):
            query = Query(se)
            bool_query = Bool()
            bool_query.filter(Terms(field="tileid", terms=[self.tileid]))
            query.add_query(bool_query)
            results = query.search(index="terms")["hits"]["hits"]

            for result in results:
                se.delete(index="terms", id=result["_id"])

            self.__preDelete(request)
            self.save_edit(
                user=request.user, edit_type="tile delete", old_value=self.data, provisional_edit_log_details=provisional_edit_log_details
            )
            super(Tile, self).delete(*args, **kwargs)
            resource = Resource.objects.get(resourceinstanceid=self.resourceinstance.resourceinstanceid)
            resource.index()

        else:
            self.apply_provisional_edit(user, data={}, action="delete")
            super(Tile, self).save(*args, **kwargs)
Beispiel #14
0
    def test_bulk_delete(self):
        """
        Test bulk deleting of documents in Elasticsearch

        """

        se = SearchEngineFactory().create()
        # se.create_index(index='test')

        for i in range(10):
            x = {
                'id': i,
                'type': 'prefLabel',
                'value': 'test pref label',
            }
            se.index_data(index='test', doc_type='test', body=x, idfield='id', refresh=True)
            y = {
                'id': i + 100,
                'type': 'altLabel',
                'value': 'test alt label',
            }
            se.index_data(index='test', doc_type='test', body=y, idfield='id', refresh=True)


        query = Query(se, start=0, limit=100)
        match = Match(field='type', query='altLabel')
        query.add_query(match)

        query.delete(index='test', refresh=True)

        self.assertEqual(se.es.count(index='test', doc_type='test')['count'], 10)
Beispiel #15
0
    def delete(self, user={}, note=''):
        """
        Deletes a single resource and any related indexed data

        """

        se = SearchEngineFactory().create()
        related_resources = self.get_related_resources(lang="en-US",
                                                       start=0,
                                                       limit=1000)
        for rr in related_resources['resource_relationships']:
            models.ResourceXResource.objects.get(pk=rr['resourcexid']).delete()
        query = Query(se)
        bool_query = Bool()
        bool_query.filter(
            Terms(field='resourceinstanceid', terms=[self.resourceinstanceid]))
        query.add_query(bool_query)
        results = query.search(index='strings',
                               doc_type='term')['hits']['hits']
        for result in results:
            se.delete(index='strings', doc_type='term', id=result['_id'])
        se.delete(index='resource',
                  doc_type=str(self.graph_id),
                  id=self.resourceinstanceid)
        self.save_edit(edit_type='delete')
        super(Resource, self).delete()
Beispiel #16
0
    def get_related_resources(self, lang='en-US', limit=1000, start=0):
        """
        Returns an object that lists the related resources, the relationship types, and a reference to the current resource

        """

        ret = {
            'resource_instance': self,
            'resource_relationships': [],
            'related_resources': []
        }
        se = SearchEngineFactory().create()
        query = Query(se, limit=limit, start=start)
        bool_filter = Bool()
        bool_filter.should(Terms(field='resourceinstanceidfrom', terms=self.resourceinstanceid))
        bool_filter.should(Terms(field='resourceinstanceidto', terms=self.resourceinstanceid))
        query.add_query(bool_filter)
        resource_relations = query.search(index='resource_relations', doc_type='all')
        ret['total'] = resource_relations['hits']['total']
        instanceids = set()
        for relation in resource_relations['hits']['hits']:
            relation['_source']['preflabel'] = get_preflabel_from_valueid(relation['_source']['relationshiptype'], lang)
            ret['resource_relationships'].append(relation['_source'])
            instanceids.add(relation['_source']['resourceinstanceidto'])
            instanceids.add(relation['_source']['resourceinstanceidfrom'])
        if len(instanceids) > 0:
            instanceids.remove(str(self.resourceinstanceid))

        related_resources = se.search(index='resource', doc_type='_all', id=list(instanceids))
        if related_resources:
            for resource in related_resources['docs']:
                ret['related_resources'].append(resource['_source'])

        return ret
Beispiel #17
0
    def delete(self, *args, **kwargs):
        se = SearchEngineFactory().create()
        request = kwargs.pop('request', None)
        provisional_edit_log_details = kwargs.pop('provisional_edit_log_details', None)
        for tile in self.tiles:
            tile.delete(*args, request=request, **kwargs)
        try:
            user = request.user
            user_is_reviewer = request.user.groups.filter(name='Resource Reviewer').exists()
        except AttributeError: #no user
            user = None

        if user_is_reviewer is True or self.user_owns_provisional(user):
            query = Query(se)
            bool_query = Bool()
            bool_query.filter(Terms(field='tileid', terms=[self.tileid]))
            query.add_query(bool_query)
            results = query.search(index='terms')['hits']['hits']

            for result in results:
                se.delete(index='terms', id=result['_id'])

            self.__preDelete(request)
            self.save_edit(
                user=request.user,
                edit_type='tile delete',
                old_value=self.data,
                provisional_edit_log_details=provisional_edit_log_details)
            super(Tile, self).delete(*args, **kwargs)
            resource = Resource.objects.get(resourceinstanceid=self.resourceinstance.resourceinstanceid)
            resource.index()

        else:
            self.apply_provisional_edit(user, data={}, action='delete')
            super(Tile, self).save(*args, **kwargs)
Beispiel #18
0
 def delete_concept_values_index(concepts_to_delete):
     se = SearchEngineFactory().create()
     for concept in concepts_to_delete.itervalues():
         query = Query(se, start=0, limit=10000)
         term = Term(field='conceptid', term=concept.id)
         query.add_query(term)
         query.delete(index='strings', doc_type='concept')
Beispiel #19
0
 def delete_concept_values_index(concepts_to_delete):
     se = SearchEngineFactory().create()
     for concept in concepts_to_delete.itervalues():
         query = Query(se, start=0, limit=10000)
         term = Term(field='conceptid', term=concept.id)
         query.add_query(term)
         query.delete(index='strings', doc_type='concept')
Beispiel #20
0
 def delete_index(self):
     se = SearchEngineFactory().create()
     query = Query(se, start=0, limit=10000)
     term = Term(field='id', term=self.id)
     query.add_query(term)
     query.delete(index='concept_labels')
     se.delete_terms(self.id)
Beispiel #21
0
def reverse_func(apps, schema_editor):
    extensions = [os.path.join(settings.ONTOLOGY_PATH, x) for x in settings.ONTOLOGY_EXT]
    management.call_command('load_ontology', source=os.path.join(settings.ONTOLOGY_PATH, settings.ONTOLOGY_BASE),
        version=settings.ONTOLOGY_BASE_VERSION, ontology_name=settings.ONTOLOGY_BASE_NAME, id=settings.ONTOLOGY_BASE_ID, extensions=','.join(extensions), verbosity=0)

    Node = apps.get_model("models", "Node")
    Edge = apps.get_model("models", "Edge")

    for node in Node.objects.all():
        node.ontologyclass = str(node.ontologyclass).split('/')[-1]
        node.save()

    for edge in Edge.objects.all():
        edge.ontologyproperty = str(edge.ontologyproperty).split('/')[-1]
        edge.save()

    # remove index for base Arches concept
    se = SearchEngineFactory().create()
    query = Query(se, start=0, limit=10000)
    query.add_query(Term(field='conceptid', term='00000000-0000-0000-0000-000000000001'))
    query.delete(index='strings', doc_type='concept')

    try:
        DValueType = apps.get_model("models", "DValueType")
        DValueType.objects.get(valuetype='identifier').delete()
    except:
        pass
def get_restricted_instances(user, search_engine=None, allresources=False):
    if allresources is False and user.is_superuser is True:
        return []

    if allresources is True:
        restricted_group_instances = {
            perm["object_pk"]
            for perm in GroupObjectPermission.objects.filter(permission__codename="no_access_to_resourceinstance").values("object_pk")
        }
        restricted_user_instances = {
            perm["object_pk"]
            for perm in UserObjectPermission.objects.filter(permission__codename="no_access_to_resourceinstance").values("object_pk")
        }
        all_restricted_instances = list(restricted_group_instances | restricted_user_instances)
        return all_restricted_instances
    else:
        terms = Terms(field="permissions.users_with_no_access", terms=[str(user.id)])
        query = Query(search_engine, start=0, limit=settings.SEARCH_RESULT_LIMIT)
        has_access = Bool()
        nested_term_filter = Nested(path="permissions", query=terms)
        has_access.must(nested_term_filter)
        query.add_query(has_access)
        results = query.search(index=RESOURCES_INDEX, scroll="1m")
        scroll_id = results["_scroll_id"]
        total = results["hits"]["total"]["value"]
        if total > settings.SEARCH_RESULT_LIMIT:
            pages = total // settings.SEARCH_RESULT_LIMIT
            for page in range(pages):
                results_scrolled = query.se.es.scroll(scroll_id=scroll_id, scroll="1m")
                results["hits"]["hits"] += results_scrolled["hits"]["hits"]
        restricted_ids = [res["_id"] for res in results["hits"]["hits"]]
        return restricted_ids
Beispiel #23
0
def get_preflabel_from_conceptid(conceptid, lang):
    ret = None
    default = {
        "category": "",
        "conceptid": "",
        "language": "",
        "value": "",
        "type": "",
        "id": ""
    }
    query = Query(se)
    bool_query = Bool()
    bool_query.must(Match(field="type", query="prefLabel", type="phrase"))
    bool_query.filter(Terms(field="conceptid", terms=[conceptid]))
    query.add_query(bool_query)
    preflabels = query.search(index=CONCEPTS_INDEX)["hits"]["hits"]
    for preflabel in preflabels:
        default = preflabel["_source"]
        # get the label in the preferred language, otherwise get the label in the default language
        if preflabel["_source"]["language"] == lang:
            return preflabel["_source"]
        if preflabel["_source"]["language"].split("-")[0] == lang.split(
                "-")[0]:
            ret = preflabel["_source"]
        if preflabel["_source"][
                "language"] == settings.LANGUAGE_CODE and ret is None:
            ret = preflabel["_source"]
    return default if ret is None else ret
Beispiel #24
0
 def get_resource_bounds(node):
     query = Query(se, start=0, limit=0)
     search_query = Bool()
     query.add_query(search_query)
     query.add_aggregation(GeoBoundsAgg(field='points.point', name='bounds'))
     results = query.search(index='resource', doc_type=[str(node.graph.pk)])
     bounds = results['aggregations']['bounds']['bounds'] if 'bounds' in results['aggregations']['bounds'] else None
     return bounds
Beispiel #25
0
def search_terms(request):
    lang = request.GET.get('lang', settings.LANGUAGE_CODE)
    se = SearchEngineFactory().create()
    searchString = request.GET.get('q', '')
    query = Query(se, start=0, limit=0)
    user_is_reviewer = request.user.groups.filter(name='Resource Reviewer').exists()

    boolquery = Bool()
    boolquery.should(Match(field='value', query=searchString.lower(), type='phrase_prefix', fuzziness='AUTO'))
    boolquery.should(Match(field='value.folded', query=searchString.lower(), type='phrase_prefix', fuzziness='AUTO'))
    boolquery.should(Match(field='value.folded', query=searchString.lower(), fuzziness='AUTO'))

    if user_is_reviewer is False:
        boolquery.filter(Terms(field='provisional', terms=['false']))

    query.add_query(boolquery)
    base_agg = Aggregation(name='value_agg', type='terms', field='value.raw', size=settings.SEARCH_DROPDOWN_LENGTH, order={"max_score": "desc"})
    nodegroupid_agg = Aggregation(name='nodegroupid', type='terms', field='nodegroupid')
    top_concept_agg = Aggregation(name='top_concept', type='terms', field='top_concept')
    conceptid_agg = Aggregation(name='conceptid', type='terms', field='conceptid')
    max_score_agg = MaxAgg(name='max_score', script='_score')

    top_concept_agg.add_aggregation(conceptid_agg)
    base_agg.add_aggregation(max_score_agg)
    base_agg.add_aggregation(top_concept_agg)
    base_agg.add_aggregation(nodegroupid_agg)
    query.add_aggregation(base_agg)
    results = query.search(index='strings') or {'hits': {'hits':[]}}

    i = 0;
    ret = []
    for result in results['aggregations']['value_agg']['buckets']:
        if len(result['top_concept']['buckets']) > 0:
            for top_concept in result['top_concept']['buckets']:
                top_concept_id = top_concept['key']
                top_concept_label = get_preflabel_from_conceptid(top_concept['key'], lang)['value']
                for concept in top_concept['conceptid']['buckets']:
                    ret.append({
                        'type': 'concept',
                        'context': top_concept_id,
                        'context_label': top_concept_label,
                        'id': i,
                        'text': result['key'],
                        'value': concept['key']
                    })
                i = i + 1
        else:
            ret.append({
                'type': 'term',
                'context': '',
                'context_label': get_resource_model_label(result),
                'id': i,
                'text': result['key'],
                'value': result['key']
            })
            i = i + 1

    return JSONResponse(ret)
Beispiel #26
0
    def delete(self, user={}, note=""):
        """
        Deletes a single resource and any related indexed data

        """

        permit_deletion = False
        graph = models.GraphModel.objects.get(graphid=self.graph_id)
        if graph.isactive is False:
            message = _("This model is not yet active; unable to delete.")
            raise ModelInactiveError(message)
        if user != {}:
            user_is_reviewer = user_is_resource_reviewer(user)
            if user_is_reviewer is False:
                tiles = list(models.TileModel.objects.filter(resourceinstance=self))
                resource_is_provisional = True if sum([len(t.data) for t in tiles]) == 0 else False
                if resource_is_provisional is True:
                    permit_deletion = True
            else:
                permit_deletion = True
        else:
            permit_deletion = True

        if permit_deletion is True:
            related_resources = self.get_related_resources(lang="en-US", start=0, limit=1000, page=0)
            for rr in related_resources["resource_relationships"]:
                # delete any related resource entries, also reindex the resource that references this resource that's being deleted
                try:
                    resourceXresource = models.ResourceXResource.objects.get(pk=rr["resourcexid"])
                    resource_to_reindex = (
                        resourceXresource.resourceinstanceidfrom_id
                        if resourceXresource.resourceinstanceidto_id == self.resourceinstanceid
                        else resourceXresource.resourceinstanceidto_id
                    )
                    resourceXresource.delete(deletedResourceId=self.resourceinstanceid)
                    res = Resource.objects.get(pk=resource_to_reindex)
                    res.load_tiles()
                    res.index()
                except ObjectDoesNotExist:
                    se.delete(index=RESOURCE_RELATIONS_INDEX, id=rr["resourcexid"])

            query = Query(se)
            bool_query = Bool()
            bool_query.filter(Terms(field="resourceinstanceid", terms=[self.resourceinstanceid]))
            query.add_query(bool_query)
            results = query.search(index=TERMS_INDEX)["hits"]["hits"]
            for result in results:
                se.delete(index=TERMS_INDEX, id=result["_id"])
            se.delete(index=RESOURCES_INDEX, id=self.resourceinstanceid)

            try:
                self.save_edit(edit_type="delete", user=user, note=self.displayname)
            except:
                pass
            super(Resource, self).delete()

        return permit_deletion
Beispiel #27
0
 def delete_concept_values_index(concepts_to_delete):
     se = SearchEngineFactory().create()
     for concept in concepts_to_delete.itervalues():
         query = Query(se, start=0, limit=10000)
         term = Term(field='conceptid', term=concept.id)
         query.add_query(term)
         query.delete(index='concept_labels')
         for conceptvalue in concept.values:
             se.delete_terms(conceptvalue.id)
Beispiel #28
0
def build_search_terms_dsl(request):
    se = SearchEngineFactory().create()
    searchString = request.GET.get('q', '')
    query = Query(se, start=0, limit=settings.SEARCH_DROPDOWN_LENGTH)
    boolquery = Bool()
    boolquery.should(Match(field='term', query=searchString.lower(), type='phrase_prefix', fuzziness='AUTO'))
    boolquery.should(Match(field='term.folded', query=searchString.lower(), type='phrase_prefix', fuzziness='AUTO'))
    boolquery.should(Match(field='term.folded', query=searchString.lower(), fuzziness='AUTO'))
    query.add_query(boolquery)
    return query
Beispiel #29
0
 def get_relations(resourceinstanceid, start, limit):
     query = Query(se, limit=limit, start=start)
     bool_filter = Bool()
     bool_filter.should(
         Terms(field='resourceinstanceidfrom',
               terms=resourceinstanceid))
     bool_filter.should(
         Terms(field='resourceinstanceidto', terms=resourceinstanceid))
     query.add_query(bool_filter)
     return query.search(index='resource_relations', doc_type='all')
Beispiel #30
0
def build_search_terms_dsl(request):
    se = SearchEngineFactory().create()
    searchString = request.GET.get('q', '')
    query = Query(se, start=0, limit=settings.SEARCH_DROPDOWN_LENGTH)
    boolquery = Bool()
    boolquery.should(Match(field='term', query=searchString.lower(), type='phrase_prefix', fuzziness='AUTO'))
    boolquery.should(Match(field='term.folded', query=searchString.lower(), type='phrase_prefix', fuzziness='AUTO'))
    boolquery.should(Match(field='term.folded', query=searchString.lower(), fuzziness='AUTO'))
    query.add_query(boolquery)
    return query
Beispiel #31
0
def search_terms(request):
    lang = request.GET.get('lang', settings.LANGUAGE_CODE)
    se = SearchEngineFactory().create()
    searchString = request.GET.get('q', '')
    query = Query(se, start=0, limit=0)

    boolquery = Bool()
    boolquery.should(Match(field='value', query=searchString.lower(), type='phrase_prefix', fuzziness='AUTO'))
    boolquery.should(Match(field='value.folded', query=searchString.lower(), type='phrase_prefix', fuzziness='AUTO'))
    boolquery.should(Match(field='value.folded', query=searchString.lower(), fuzziness='AUTO'))
    query.add_query(boolquery)

    base_agg = Aggregation(name='value_agg', type='terms', field='value.raw', size=settings.SEARCH_DROPDOWN_LENGTH, order={"max_score": "desc"})
    nodegroupid_agg = Aggregation(name='nodegroupid', type='terms', field='nodegroupid')
    top_concept_agg = Aggregation(name='top_concept', type='terms', field='top_concept')
    conceptid_agg = Aggregation(name='conceptid', type='terms', field='conceptid')
    max_score_agg = MaxAgg(name='max_score', script='_score')

    top_concept_agg.add_aggregation(conceptid_agg)
    base_agg.add_aggregation(max_score_agg)
    base_agg.add_aggregation(top_concept_agg)
    base_agg.add_aggregation(nodegroupid_agg)
    query.add_aggregation(base_agg)

    results = query.search(index='strings') or {'hits': {'hits':[]}}

    i = 0;
    ret = []
    for result in results['aggregations']['value_agg']['buckets']:
        if len(result['top_concept']['buckets']) > 0:
            for top_concept in result['top_concept']['buckets']:
                top_concept_id = top_concept['key']
                top_concept_label = get_preflabel_from_conceptid(top_concept['key'], lang)['value']
                for concept in top_concept['conceptid']['buckets']:
                    ret.append({
                        'type': 'concept',
                        'context': top_concept_id,
                        'context_label': top_concept_label,
                        'id': i,
                        'text': result['key'],
                        'value': concept['key']
                    })
                i = i + 1
        else:
            ret.append({
                'type': 'term',
                'context': '',
                'context_label': '',
                'id': i,
                'text': result['key'],
                'value': result['key']
            })
            i = i + 1

    return JSONResponse(ret)
Beispiel #32
0
def index_resources_by_type(resource_types, clear_index=True, index_name=None, batch_size=settings.BULK_IMPORT_BATCH_SIZE):
    """
    Indexes all resources of a given type(s)

    Arguments:
    resource_types -- array of graph ids that represent resource types

    Keyword Arguments:
    clear_index -- set to True to remove all the resources of the types passed in from the index before the reindexing operation
    index_name -- only applies to custom indexes and if given will try and just refresh the data in that index
    batch_size -- the number of records to index as a group, the larger the number to more memory required

    """
    
    status = ''
    se = SearchEngineFactory().create()
    datatype_factory = DataTypeFactory()
    node_datatypes = {str(nodeid): datatype for nodeid, datatype in models.Node.objects.values_list('nodeid', 'datatype')}

    status = ''
    for resource_type in resource_types:
        start = datetime.now()
        resources = Resource.objects.filter(graph_id=str(resource_type))
        graph_name = models.GraphModel.objects.get(graphid=str(resource_type)).name
        print("Indexing resource type '{0}'".format(graph_name))

        if index_name is None:
            q = Query(se=se)
            term = Term(field='graph_id', term=str(resource_type))
            q.add_query(term)
            if clear_index:
                q.delete(index='resources', refresh=True)

            with se.BulkIndexer(batch_size=batch_size, refresh=True) as doc_indexer:
                with se.BulkIndexer(batch_size=batch_size, refresh=True) as term_indexer:
                    for resource in resources:
                        document, terms = resource.get_documents_to_index(fetchTiles=True, datatype_factory=datatype_factory, node_datatypes=node_datatypes)
                        doc_indexer.add(index='resources', id=document['resourceinstanceid'], data=document)
                        for term in terms:
                            term_indexer.add(index='terms', id=term['_id'], data=term['_source'])

            result_summary = {'database': len(resources), 'indexed': se.count(index='resources', body=q.dsl)}
            status = 'Passed' if result_summary['database'] == result_summary['indexed'] else 'Failed'
            print("Status: {0}, Resource Type: {1}, In Database: {2}, Indexed: {3}, Took: {4} seconds".format(status, graph_name, result_summary['database'], result_summary['indexed'], (datetime.now()-start).seconds))

            for index in settings.ELASTICSEARCH_CUSTOM_INDEXES:
                es_index = import_class_from_string(index['module'])(index['name'])
                es_index.bulk_index(resources=resources, resource_type=resource_type, graph_name=graph_name, clear_index=clear_index)

        else:
            es_index = get_index(index_name)
            es_index.bulk_index(resources=resources, resource_type=resource_type, graph_name=graph_name, clear_index=clear_index)

    return status
Beispiel #33
0
 def get_resource_bounds(node):
     query = Query(se, start=0, limit=0)
     search_query = Bool()
     query.add_query(search_query)
     query.add_aggregation(
         GeoBoundsAgg(field='points.point', name='bounds'))
     results = query.search(index='resource',
                            doc_type=[str(node.graph_id)])
     bounds = results['aggregations']['bounds'][
         'bounds'] if 'bounds' in results['aggregations'][
             'bounds'] else None
     return bounds
Beispiel #34
0
    def bulk_index(self,
                   resources=None,
                   resource_type=None,
                   graph_name=None,
                   clear_index=True):
        """
        Indexes a list of documents in bulk to Elastic Search

        Arguments:
        None

        Keyword Arguments:
        resources -- the list of resource instances to index
        resource_type -- the type of resources being indexed
        graph_name -- the name of the graph model that represents the resources being indexed
        clear_index -- True(default) to remove all index records of type "resource_type" before indexing, 
            assumes that a field called "graph_id" exists on the indexed documents

        Return: None
        """

        start = datetime.now()
        q = Query(se=self.se)
        if clear_index:
            term = Term(field="graph_id", term=str(resource_type))
            q.add_query(term)
            q.delete(index=self.index_name, refresh=True)

        q = Query(se=self.se)
        count_before = self.se.count(index=self.index_name, body=q.dsl)

        result_summary = {"database": len(resources), "indexed": 0}
        with self.se.BulkIndexer(batch_size=settings.BULK_IMPORT_BATCH_SIZE,
                                 refresh=True) as indexer:
            for resource in resources:
                tiles = list(
                    models.TileModel.objects.filter(resourceinstance=resource))
                document, doc_id = self.get_documents_to_index(resource, tiles)
                if document is not None and id is not None:
                    indexer.add(index=self.index_name,
                                id=doc_id,
                                data=document)

        result_summary["indexed"] = self.se.count(index=self.index_name,
                                                  body=q.dsl) - count_before
        status = "Passed" if result_summary["database"] == result_summary[
            "indexed"] else "Failed"
        print("Custom Index - %s:" % self.index_name)
        print(
            "    Status: {0}, Resource Type: {1}, In Database: {2}, Indexed: {3}, Took: {4} seconds"
            .format(status, graph_name, result_summary["database"],
                    result_summary["indexed"],
                    (datetime.now() - start).seconds))
Beispiel #35
0
    def delete(self, user={}, note=''):
        """
        Deletes a single resource and any related indexed data

        """

        permit_deletion = False
        graph = models.GraphModel.objects.get(graphid=self.graph_id)
        if graph.isactive is False:
            message = _('This model is not yet active; unable to delete.')
            raise ModelInactiveError(message)
        if user != {}:
            user_is_reviewer = user.groups.filter(
                name='Resource Reviewer').exists()
            if user_is_reviewer is False:
                tiles = list(
                    models.TileModel.objects.filter(resourceinstance=self))
                resource_is_provisional = True if sum(
                    [len(t.data) for t in tiles]) == 0 else False
                if resource_is_provisional is True:
                    permit_deletion = True
            else:
                permit_deletion = True
        else:
            permit_deletion = True

        if permit_deletion is True:
            se = SearchEngineFactory().create()
            related_resources = self.get_related_resources(lang="en-US",
                                                           start=0,
                                                           limit=1000,
                                                           page=0)
            for rr in related_resources['resource_relationships']:
                models.ResourceXResource.objects.get(
                    pk=rr['resourcexid']).delete()
            query = Query(se)
            bool_query = Bool()
            bool_query.filter(
                Terms(field='resourceinstanceid',
                      terms=[self.resourceinstanceid]))
            query.add_query(bool_query)
            results = query.search(index='terms')['hits']['hits']
            for result in results:
                se.delete(index='terms', id=result['_id'])
            se.delete(index='resources', id=self.resourceinstanceid)

            self.save_edit(edit_type='delete',
                           user=user,
                           note=self.displayname)
            super(Resource, self).delete()

        return permit_deletion
Beispiel #36
0
 def get_resource_bounds(node):
     query = Query(se, start=0, limit=0)
     search_query = Bool()
     query.add_query(search_query)
     query.add_aggregation(
         GeoBoundsAgg(field="points.point", name="bounds"))
     query.add_query(
         Term(field="graph_id", term=str(node.graph.graphid)))
     results = query.search(index=RESOURCES_INDEX)
     bounds = results["aggregations"]["bounds"][
         "bounds"] if "bounds" in results["aggregations"][
             "bounds"] else None
     return bounds
Beispiel #37
0
def get_search_range_contexts(request):
    search_range_context = {}
    search_range_context = cache.get('search_range_contexts')
    if search_range_context is not None:
        #print 'Search_range_context iz cacha!'
        return search_range_context
    lang = request.GET.get('lang', request.LANGUAGE_CODE)
    se1 = SearchEngineFactory().create()
    context_label1 = '-'
    search_range_context = {}
    for search_term in settings.RANGE_TERMS:
        searchString1 = search_term['text']
        query1 = Query(se1, start=0, limit=settings.SEARCH_DROPDOWN_LENGTH)
        boolquery1 = Bool()
        boolquery1.should(Match(field='term', query=searchString1.lower(), type='phrase_prefix', fuzziness='AUTO'))
        boolquery1.should(Match(field='term.folded', query=searchString1.lower(), type='phrase_prefix', fuzziness='AUTO'))
        boolquery1.should(Match(field='term.folded', query=searchString1.lower(), fuzziness='AUTO'))
        query1.add_query(boolquery1)
        results1 = query1.search(index='term', doc_type='value')
        conceptid1 = ''
        context1 = ''
        for result1 in results1['hits']['hits']:
            prefLabel = get_preflabel_from_conceptid(result1['_source']['context'], lang)
            result1['_source']['options']['context_label'] = prefLabel['value']
            if (prefLabel['value'] == search_term['context_label'] and result1['_source']['term'] == search_term['text']):
                #print result1['_source']['ids'][0]
                conceptid1 = result1['_source']['options']['conceptid']
                context1 = result1['_source']['context']
                valueid1 = result1['_source']['ids'][0]
                #print search_term['context_label'] + ': ' + conceptid1
                #print searchString1
                #print result1
        result = {'conceptid': conceptid1, 'context': context1, 'valueid': valueid1}
        if context_label1 <> search_term['context_label']:
            value = {}
        #print result
        value[search_term['text_key']] = result
        #print value
        search_range_context[search_term['context_key']] = value
        #print search_range_context
        #print 'Iscem [' + search_term['context_label'] + '][' + search_term['text']  + ']'
        #print value
        context_label1 = search_term['context_label']
    #print search_range_context
    #print search_range_context['Historical_Period']['BRONZE_AGE']
    #print 'Shranjujem search_range_context v cache'
    cache.set('search_range_contexts', search_range_context, 86400)
    return search_range_context
Beispiel #38
0
def get_search_contexts(request):
    search_context = {}
    search_context = cache.get('search_contexts')
    if search_context is not None:
        #print 'Search_context iz cacha!'
        return search_context
    lang = request.GET.get('lang', settings.LANGUAGE_CODE)
    se1 = SearchEngineFactory().create()
    context_label1 = '-'
    search_context = {}
    for search_term in settings.SEARCH_TERMS:
        searchString1 = search_term['text']
        print searchString1
        query1 = Query(se1, start=0, limit=settings.SEARCH_DROPDOWN_LENGTH)
        boolquery1 = Bool()
        boolquery1.should(Match(field='term', query=searchString1.lower(), type='phrase_prefix', fuzziness='AUTO'))
        boolquery1.should(Match(field='term.folded', query=searchString1.lower(), type='phrase_prefix', fuzziness='AUTO'))
        boolquery1.should(Match(field='term.folded', query=searchString1.lower(), fuzziness='AUTO'))
        query1.add_query(boolquery1)
        results1 = query1.search(index='term', doc_type='value')
        conceptid1 = ''
        context1 = ''
        for result1 in results1['hits']['hits']:
            prefLabel = get_preflabel_from_conceptid(result1['_source']['context'], lang)
            result1['_source']['options']['context_label'] = prefLabel['value']
            if (prefLabel['value'] == search_term['context_label'] and result1['_source']['term'] == search_term['text']):
                conceptid1 = result1['_source']['options']['conceptid']
                context1 = result1['_source']['context']
                #print search_term['context_label'] + ': ' + conceptid1
                #print searchString1
                #print result1
        result = {'conceptid': conceptid1, 'context': context1}
        if context_label1 <> search_term['context_label']:
            value = {}
        print result
        value[search_term['text_key']] = result
        #print value
        search_context[search_term['context_key']] = value
        #print search_context
        #print 'Iscem [' + search_term['context_label'] + '][' + search_term['text']  + ']'
        #print value
        context_label1 = search_term['context_label']
    #print search_context
    #print search_context['Historical_Period']['BRONZE_AGE']
    #print 'Shranjujem search_context v cache'
    cache.set('search_contexts', search_context, 86400)
    return search_context
Beispiel #39
0
    def delete(self, *args, **kwargs):
        se = SearchEngineFactory().create()
        request = kwargs.pop("request", None)
        index = kwargs.pop("index", True)
        transaction_id = kwargs.pop("index", None)
        provisional_edit_log_details = kwargs.pop(
            "provisional_edit_log_details", None)
        for tile in self.tiles:
            tile.delete(*args, request=request, **kwargs)
        try:
            user = request.user
            user_is_reviewer = user_is_resource_reviewer(user)
        except AttributeError:  # no user
            user = None
            user_is_reviewer = True

        if user_is_reviewer is True or self.user_owns_provisional(user):
            if index:
                query = Query(se)
                bool_query = Bool()
                bool_query.filter(Terms(field="tileid", terms=[self.tileid]))
                query.add_query(bool_query)
                results = query.delete(index=TERMS_INDEX)

            self.__preDelete(request)
            self.save_edit(
                user=user,
                edit_type="tile delete",
                old_value=self.data,
                provisional_edit_log_details=provisional_edit_log_details,
                transaction_id=transaction_id,
            )
            try:
                super(Tile, self).delete(*args, **kwargs)
                for nodeid in self.data.keys():
                    node = models.Node.objects.get(nodeid=nodeid)
                    datatype = self.datatype_factory.get_instance(
                        node.datatype)
                    datatype.post_tile_delete(self, nodeid, index=index)
                if index:
                    self.index()
            except IntegrityError as e:
                logger.error(e)

        else:
            self.apply_provisional_edit(user, data={}, action="delete")
            super(Tile, self).save(*args, **kwargs)
def get_indexed_concepts(se, conceptid, concept_value):
    """
    Searches for a conceptid from the database and confirms that the database concept value matches the indexed value 

    """

    result = 'failed: cannot find' + conceptid
    query = Query(se, start=0, limit=100)
    phrase = Match(field='conceptid', query=conceptid, type='phrase_prefix')
    query.add_query(phrase)
    results = query.search(index='concept_labels')
    if len(results['hits']['hits']) > 0:
        source = results['hits']['hits'][0]['_source']
        if conceptid == source['conceptid'] or concept_value == source['value']:
            result = 'passed'
        else:
            result = 'failed: concept value does not match'
    return result
Beispiel #41
0
def get_indexed_concepts(se, conceptid, concept_value):
    """
    Searches for a conceptid from the database and confirms that the database concept value matches the indexed value 

    """

    result = 'failed: cannot find' + conceptid
    query = Query(se, start=0, limit=100)
    phrase = Match(field='conceptid', query=conceptid, type='phrase_prefix')
    query.add_query(phrase)
    results = query.search(index='concept_labels')
    if len(results['hits']['hits']) > 0:
        source = results['hits']['hits'][0]['_source']
        if conceptid == source['conceptid'] or concept_value == source['value']:
            result = 'passed'
        else:
            result = 'failed: concept value does not match'
    return result
Beispiel #42
0
    def delete(self):
        """
        Deletes a single resource and any related indexed data

        """

        se = SearchEngineFactory().create()
        related_resources = self.get_related_resources(lang="en-US", start=0, limit=15)
        for rr in related_resources['resource_relationships']:
            models.ResourceXResource.objects.get(pk=rr['resourcexid']).delete()
        query = Query(se)
        bool_query = Bool()
        bool_query.filter(Terms(field='resourceinstanceid', terms=[self.resourceinstanceid]))
        query.add_query(bool_query)
        results = query.search(index='strings', doc_type='term')['hits']['hits']
        for result in results:
            se.delete(index='strings', doc_type='term', id=result['_id'])
        se.delete(index='resource', doc_type=str(self.graph_id), id=self.resourceinstanceid)
        super(Resource, self).delete()
Beispiel #43
0
    def delete(self, *args, **kwargs):
        se = SearchEngineFactory().create()
        request = kwargs.pop('request', None)
        for tiles in self.tiles.itervalues():
            for tile in tiles:
                tile.delete(*args, request=request, **kwargs)

        query = Query(se)
        bool_query = Bool()
        bool_query.filter(Terms(field='tileid', terms=[self.tileid]))
        query.add_query(bool_query)
        results = query.search(index='strings', doc_type='term')['hits']['hits']
        for result in results:
            se.delete(index='strings', doc_type='term', id=result['_id'])

        self.__preDelete(request)
        super(Tile, self).delete(*args, **kwargs)
        resource = Resource.objects.get(resourceinstanceid=self.resourceinstance.resourceinstanceid)
        resource.index()
Beispiel #44
0
def get_preflabel_from_conceptid(conceptid, lang):
    ret = None
    default = {"category": "", "conceptid": "", "language": "", "value": "", "type": "", "id": ""}
    se = SearchEngineFactory().create()
    query = Query(se)
    terms = Terms(field="conceptid", terms=[conceptid])
    match = Match(field="type", query="preflabel", type="phrase")
    query.add_filter(terms)
    query.add_query(match)
    preflabels = query.search(index="concept_labels")["hits"]["hits"]
    for preflabel in preflabels:
        default = preflabel["_source"]
        # get the label in the preferred language, otherwise get the label in the default language
        if preflabel["_source"]["language"] == lang:
            return preflabel["_source"]
        if preflabel["_source"]["language"].split("-")[0] == lang.split("-")[0]:
            ret = preflabel["_source"]
        if preflabel["_source"]["language"] == settings.LANGUAGE_CODE and ret == None:
            ret = preflabel["_source"]
    return default if ret == None else ret
Beispiel #45
0
 def delete_index(self):
     se = SearchEngineFactory().create()
     query = Query(se, start=0, limit=10000)
     term = Term(field='id', term=self.id)
     query.add_query(term)
     query.delete(index='strings', doc_type='concept')
Beispiel #46
0
def build_search_results_dsl(request):
    term_filter = request.GET.get('termFilter', '')
    spatial_filter = JSONDeserializer().deserialize(request.GET.get('mapFilter', '{}'))
    export = request.GET.get('export', None)
    page = 1 if request.GET.get('page') == '' else int(request.GET.get('page', 1))
    temporal_filter = JSONDeserializer().deserialize(request.GET.get('temporalFilter', '{}'))

    se = SearchEngineFactory().create()

    if export != None:
        limit = settings.SEARCH_EXPORT_ITEMS_PER_PAGE
    else:
        limit = settings.SEARCH_ITEMS_PER_PAGE

    query = Query(se, start=limit*int(page-1), limit=limit)
    query.add_aggregation(GeoHashGridAgg(field='points', name='grid', precision=settings.HEX_BIN_PRECISION))
    query.add_aggregation(GeoBoundsAgg(field='points', name='bounds'))
    search_query = Bool()


    if term_filter != '':
        for term in JSONDeserializer().deserialize(term_filter):
            if term['type'] == 'term':
                term_filter = Match(field='strings', query=term['value'], type='phrase')
                if term['inverted']:
                    search_query.must_not(term_filter)
                else:
                    search_query.must(term_filter)
            elif term['type'] == 'concept':
                concept_ids = _get_child_concepts(term['value'])
                conceptid_filter = Terms(field='domains.conceptid', terms=concept_ids)
                if term['inverted']:
                    search_query.must_not(conceptid_filter)
                else:
                    search_query.must(conceptid_filter)
            elif term['type'] == 'string':
                string_filter = Bool()
                string_filter.should(Match(field='strings', query=term['value'], type='phrase_prefix'))
                string_filter.should(Match(field='strings.folded', query=term['value'], type='phrase_prefix'))
                if term['inverted']:
                    search_query.must_not(string_filter)
                else:
                    search_query.must(string_filter)

    if 'features' in spatial_filter:
        if len(spatial_filter['features']) > 0:
            feature_geom = spatial_filter['features'][0]['geometry']
            feature_properties = spatial_filter['features'][0]['properties']
            buffer = {'width':0,'unit':'ft'}
            if 'buffer' in feature_properties:
                buffer = feature_properties['buffer']
            feature_geom = JSONDeserializer().deserialize(_buffer(feature_geom,buffer['width'],buffer['unit']).json)
            geoshape = GeoShape(field='geometries.features.geometry', type=feature_geom['type'], coordinates=feature_geom['coordinates'] )

            invert_spatial_search = False
            if 'inverted' in feature_properties:
                invert_spatial_search = feature_properties['inverted']

            if invert_spatial_search == True:
                search_query.must_not(geoshape)
            else:
                search_query.must(geoshape)

    if 'fromDate' in temporal_filter and 'toDate' in temporal_filter:
        now = str(datetime.utcnow())
        start_date = None
        end_date = None
        start_year = 'null'
        end_year = 'null'
        try:
            # start_date = parser.parse(temporal_filter['fromDate'])
            # start_date = start_date.isoformat()
            sd = FlexiDate.from_str(temporal_filter['fromDate'])
            start_date = int((sd.as_float()-1970)*31556952*1000)

            #start_year = parser.parse(start_date).year
            start_year = sd.year
        except:
            pass

        try:
            # end_date = parser.parse(temporal_filter['toDate'])
            # end_date = end_date.isoformat()
            ed = FlexiDate.from_str(temporal_filter['toDate'])
            end_date = int((ed.as_float()-1970)*31556952*1000)

            #end_year = parser.parse(end_date).year
            end_year = ed.year
        except:
            pass


        # add filter for concepts that define min or max dates
        sql = None
        basesql = """
            SELECT value.conceptid
            FROM (
                SELECT
                    {select_clause},
                    v.conceptid
                FROM
                    public."values" v,
                    public."values" v2
                WHERE
                    v.conceptid = v2.conceptid and
                    v.valuetype = 'min_year' and
                    v2.valuetype = 'max_year'
            ) as value
            WHERE overlap = true;
        """

        temporal_query = Bool()

        if 'inverted' not in temporal_filter:
            temporal_filter['inverted'] = False

        if temporal_filter['inverted']:
            # inverted date searches need to use an OR clause and are generally more complicated to structure (can't use ES must_not)
            # eg: less than START_DATE OR greater than END_DATE
            select_clause = []
            inverted_date_filter = Bool()

            field = 'dates'
            if 'dateNodeId' in temporal_filter and temporal_filter['dateNodeId'] != '':
                field='tiles.data.%s' % (temporal_filter['dateNodeId'])

            if start_date is not None:
                inverted_date_filter.should(Range(field=field, lte=start_date))
                select_clause.append("(numrange(v.value::int, v2.value::int, '[]') && numrange(null,{start_year},'[]'))")
            if end_date is not None:
                inverted_date_filter.should(Range(field=field, gte=end_date))
                select_clause.append("(numrange(v.value::int, v2.value::int, '[]') && numrange({end_year},null,'[]'))")

            if 'dateNodeId' in temporal_filter and temporal_filter['dateNodeId'] != '':
                date_range_query = Nested(path='tiles', query=inverted_date_filter)
                temporal_query.should(date_range_query)
            else:
                temporal_query.should(inverted_date_filter)

                select_clause = " or ".join(select_clause) + " as overlap"
                sql = basesql.format(select_clause=select_clause).format(start_year=start_year, end_year=end_year)

        else:
            if 'dateNodeId' in temporal_filter and temporal_filter['dateNodeId'] != '':
                range = Range(field='tiles.data.%s' % (temporal_filter['dateNodeId']), gte=start_date, lte=end_date)
                date_range_query = Nested(path='tiles', query=range)
                temporal_query.should(date_range_query)
            else:
                date_range_query = Range(field='dates', gte=start_date, lte=end_date)
                temporal_query.should(date_range_query)

                select_clause = """
                    numrange(v.value::int, v2.value::int, '[]') && numrange({start_year},{end_year},'[]') as overlap
                """
                sql = basesql.format(select_clause=select_clause).format(start_year=start_year, end_year=end_year)

        # is a dateNodeId is not specified
        if sql is not None:
            cursor = connection.cursor()
            cursor.execute(sql)
            ret =  [str(row[0]) for row in cursor.fetchall()]

            if len(ret) > 0:
                conceptid_filter = Terms(field='domains.conceptid', terms=ret)
                temporal_query.should(conceptid_filter)


        search_query.must(temporal_query)

    query.add_query(search_query)
    return query
Beispiel #47
0
    def prepare_documents_for_search_index(self):
        """
        Generates a list of specialized resource based documents to support resource search

        """
        # Arches_hip
        documents = super(Resource, self).prepare_documents_for_search_index()
        for document in documents:
            document['date_groups'] = []
            for nodes in self.get_nodes('BEGINNING_OF_EXISTENCE.E63', keys=['value']):
                document['date_groups'].append({
                    'conceptid': nodes['BEGINNING_OF_EXISTENCE_TYPE_E55__value'],
                    'value': nodes['START_DATE_OF_EXISTENCE_E49__value']
                })

            for nodes in self.get_nodes('END_OF_EXISTENCE.E64', keys=['value']):
                document['date_groups'].append({
                    'conceptid': nodes['END_OF_EXISTENCE_TYPE_E55__value'],
                    'value': nodes['END_DATE_OF_EXISTENCE_E49__value']
                })

            for nodes in self.get_nodes('GRAVE_MEASUREMENT_TYPE.E55', keys=['value','label']):
                # Poiscemo in shranimo le contextid (sicer je v vsakem jeziku drugacna vrednost)
                lang = settings.LANGUAGE_CODE
                se1 = SearchEngineFactory().create()
                context_label1 = '-'
                search_context = {}
                #print 'Iscem podatke za ' + nodes['GRAVE_MEASUREMENT_TYPE_E55__value']
                searchString1 = nodes['GRAVE_MEASUREMENT_TYPE_E55__label']
                query1 = Query(se1, start=0, limit=settings.SEARCH_DROPDOWN_LENGTH)
                boolquery1 = Bool()
                boolquery1.should(Match(field='term', query=searchString1.lower(), type='phrase_prefix', fuzziness='AUTO'))
                boolquery1.should(Match(field='term.folded', query=searchString1.lower(), type='phrase_prefix', fuzziness='AUTO'))
                boolquery1.should(Match(field='term.folded', query=searchString1.lower(), fuzziness='AUTO'))
                query1.add_query(boolquery1)
                results1 = query1.search(index='term', doc_type='value')
                conceptid1 = ''
                context1 = ''
                data_type = nodes['GRAVE_MEASUREMENT_TYPE_E55__value']
                for result1 in results1['hits']['hits']:
                    #print result1result1['_source']['ids'][0]
                    conceptid1 = result1['_source']['options']
                    valueid1 = result1['_source']['ids'][0]
                    if nodes['GRAVE_MEASUREMENT_TYPE_E55__value'] == valueid1:
                        #print 'Nasel: ' + conceptid1['conceptid']
                        data_type = conceptid1['conceptid']
                document['value_' + data_type] = float(nodes['VALUE_OF_MEASUREMENT_E60__value'])

            for nodes in self.get_nodes('OBJECT_MEASUREMENT_TYPE.E55', keys=['value','label']):
                # Poiscemo in shranimo le contextid (sicer je v vsakem jeziku drugacna vrednost)
                lang = settings.LANGUAGE_CODE
                se1 = SearchEngineFactory().create()
                context_label1 = '-'
                search_context = {}
                #print 'Iscem podatke za ' + nodes['GRAVE_MEASUREMENT_TYPE_E55__value']
                searchString1 = nodes['OBJECT_MEASUREMENT_TYPE_E55__label']
                query1 = Query(se1, start=0, limit=settings.SEARCH_DROPDOWN_LENGTH)
                boolquery1 = Bool()
                boolquery1.should(Match(field='term', query=searchString1.lower(), type='phrase_prefix', fuzziness='AUTO'))
                boolquery1.should(Match(field='term.folded', query=searchString1.lower(), type='phrase_prefix', fuzziness='AUTO'))
                boolquery1.should(Match(field='term.folded', query=searchString1.lower(), fuzziness='AUTO'))
                query1.add_query(boolquery1)
                results1 = query1.search(index='term', doc_type='value')
                conceptid1 = ''
                context1 = ''
                data_type = nodes['OBJECT_MEASUREMENT_TYPE_E55__value']
                for result1 in results1['hits']['hits']:
                    #print result1result1['_source']['ids'][0]
                    conceptid1 = result1['_source']['options']
                    valueid1 = result1['_source']['ids'][0]
                    if nodes['OBJECT_MEASUREMENT_TYPE_E55__value'] == valueid1:
                        #print 'Nasel: ' + conceptid1['conceptid']
                        data_type = conceptid1['conceptid']
                document['value_' + data_type] = float(nodes['VALUE_OF_MEASUREMENT_E60__value'])
            #print document

            #for nodes in self.get_nodes('GRAVE_MEASUREMENT_TYPE.E55', keys=['value']):
            #    document['measurement_groups'].append({
            #        'conceptid': nodes['GRAVE_MEASUREMENT_TYPE_E55__value'],
            #        'value': nodes['VALUE_OF_MEASUREMENT_E60__value']
            #    })

            #for nodes in self.get_nodes('OBJECT_MEASUREMENT_TYPE.E55', keys=['value']):
            #    document['measurement_groups'].append({
            #        'conceptid': nodes['OBJECT_MEASUREMENT_TYPE_E55__value'],
            #        'value': nodes['VALUE_OF_MEASUREMENT_E60__value']
            #    })
            
            if self.entitytypeid == 'HERITAGE_RESOURCE.E18' or self.entitytypeid == 'SITE.E18' or self.entitytypeid == 'GRAVE.E18' or self.entitytypeid == 'OBJECT.E18':
                document['searchType'] = self.get_current_type()
                #document['parentName'] = self.get_parent_name()
                    
                #document_data['designations'] = get_entity_data('TYPE_OF_DESIGNATION_OR_PROTECTION.E55', get_label=True)
                if self.get_nodes('SPATIAL_COORDINATES_GEOMETRY.E47', keys=['value']):
                    point = self.get_nodes('SPATIAL_COORDINATES_GEOMETRY.E47', keys=['value'])[0]['SPATIAL_COORDINATES_GEOMETRY_E47__value']
                    if not isinstance(point, basestring):
                        point = str(point)
                    if point.find('POINT')>=0:
                        lon = point[6:point.find(' ', 7)]
                        #print lon
                        lat = point[point.find(' ',7)+1:point.find(')')]
                        #print lat
                        document['longitude'] = lon
                        document['latitude'] = lat  
        
        return documents
Beispiel #48
0
def build_search_results_dsl(request):
    term_filter = request.GET.get('termFilter', '')
    spatial_filter = JSONDeserializer().deserialize(request.GET.get('mapFilter', '{}'))
    export = request.GET.get('export', None)
    page = 1 if request.GET.get('page') == '' else int(request.GET.get('page', 1))
    temporal_filter = JSONDeserializer().deserialize(request.GET.get('temporalFilter', '{}'))
    advanced_filters = JSONDeserializer().deserialize(request.GET.get('advanced', '[]'))
    search_buffer = None
    se = SearchEngineFactory().create()

    if export != None:
        limit = settings.SEARCH_EXPORT_ITEMS_PER_PAGE
    else:
        limit = settings.SEARCH_ITEMS_PER_PAGE

    query = Query(se, start=limit*int(page-1), limit=limit)
    nested_agg = NestedAgg(path='points', name='geo_aggs')
    nested_agg.add_aggregation(GeoHashGridAgg(field='points.point', name='grid', precision=settings.HEX_BIN_PRECISION))
    nested_agg.add_aggregation(GeoBoundsAgg(field='points.point', name='bounds'))
    query.add_aggregation(nested_agg)

    search_query = Bool()
    permitted_nodegroups = get_permitted_nodegroups(request.user)

    if term_filter != '':
        for term in JSONDeserializer().deserialize(term_filter):
            term_query = Bool()
            if term['type'] == 'term' or term['type'] == 'string':
                string_filter = Bool()
                if term['type'] == 'term':
                    string_filter.must(Match(field='strings.string', query=term['value'], type='phrase'))
                elif term['type'] == 'string':
                    string_filter.should(Match(field='strings.string', query=term['value'], type='phrase_prefix'))
                    string_filter.should(Match(field='strings.string.folded', query=term['value'], type='phrase_prefix'))

                string_filter.filter(Terms(field='strings.nodegroup_id', terms=permitted_nodegroups))
                nested_string_filter = Nested(path='strings', query=string_filter)
                if term['inverted']:
                    search_query.must_not(nested_string_filter)
                else:
                    search_query.must(nested_string_filter)
                    # need to set min_score because the query returns results with score 0 and those have to be removed, which I don't think it should be doing
                    query.min_score('0.01')
            elif term['type'] == 'concept':
                concept_ids = _get_child_concepts(term['value'])
                conceptid_filter = Bool()
                conceptid_filter.filter(Terms(field='domains.conceptid', terms=concept_ids))
                conceptid_filter.filter(Terms(field='domains.nodegroup_id', terms=permitted_nodegroups))
                nested_conceptid_filter = Nested(path='domains', query=conceptid_filter)
                if term['inverted']:
                    search_query.must_not(nested_conceptid_filter)
                else:
                    search_query.filter(nested_conceptid_filter)

    if 'features' in spatial_filter:
        if len(spatial_filter['features']) > 0:
            feature_geom = spatial_filter['features'][0]['geometry']
            feature_properties = spatial_filter['features'][0]['properties']
            buffer = {'width':0,'unit':'ft'}
            if 'buffer' in feature_properties:
                buffer = feature_properties['buffer']
            search_buffer = _buffer(feature_geom, buffer['width'], buffer['unit'])
            feature_geom = JSONDeserializer().deserialize(search_buffer.json)
            geoshape = GeoShape(field='geometries.geom.features.geometry', type=feature_geom['type'], coordinates=feature_geom['coordinates'] )

            invert_spatial_search = False
            if 'inverted' in feature_properties:
                invert_spatial_search = feature_properties['inverted']

            spatial_query = Bool()
            if invert_spatial_search == True:
                spatial_query.must_not(geoshape)
            else:
                spatial_query.filter(geoshape)

            # get the nodegroup_ids that the user has permission to search
            spatial_query.filter(Terms(field='geometries.nodegroup_id', terms=permitted_nodegroups))
            search_query.filter(Nested(path='geometries', query=spatial_query))

    if 'fromDate' in temporal_filter and 'toDate' in temporal_filter:
        now = str(datetime.utcnow())
        start_date = SortableDate(temporal_filter['fromDate'])
        end_date = SortableDate(temporal_filter['toDate'])
        date_nodeid = str(temporal_filter['dateNodeId']) if 'dateNodeId' in temporal_filter and temporal_filter['dateNodeId'] != '' else None
        query_inverted = False if 'inverted' not in temporal_filter else temporal_filter['inverted']

        temporal_query = Bool()

        if query_inverted:
            # inverted date searches need to use an OR clause and are generally more complicated to structure (can't use ES must_not)
            # eg: less than START_DATE OR greater than END_DATE
            inverted_date_query = Bool()
            inverted_date_ranges_query = Bool()

            if start_date.is_valid():
                inverted_date_query.should(Range(field='dates.date', lt=start_date.as_float()))
                inverted_date_ranges_query.should(Range(field='date_ranges.date_range', lt=start_date.as_float()))
            if end_date.is_valid():
                inverted_date_query.should(Range(field='dates.date', gt=end_date.as_float()))
                inverted_date_ranges_query.should(Range(field='date_ranges.date_range', gt=end_date.as_float()))

            date_query = Bool()
            date_query.filter(inverted_date_query)
            date_query.filter(Terms(field='dates.nodegroup_id', terms=permitted_nodegroups))
            if date_nodeid:
                date_query.filter(Term(field='dates.nodeid', term=date_nodeid))
            else:
                date_ranges_query = Bool()
                date_ranges_query.filter(inverted_date_ranges_query)
                date_ranges_query.filter(Terms(field='date_ranges.nodegroup_id', terms=permitted_nodegroups))
                temporal_query.should(Nested(path='date_ranges', query=date_ranges_query))
            temporal_query.should(Nested(path='dates', query=date_query))

        else:
            date_query = Bool()
            date_query.filter(Range(field='dates.date', gte=start_date.as_float(), lte=end_date.as_float()))
            date_query.filter(Terms(field='dates.nodegroup_id', terms=permitted_nodegroups))
            if date_nodeid:
                date_query.filter(Term(field='dates.nodeid', term=date_nodeid))
            else:
                date_ranges_query = Bool()
                date_ranges_query.filter(Range(field='date_ranges.date_range', gte=start_date.as_float(), lte=end_date.as_float(), relation='intersects'))
                date_ranges_query.filter(Terms(field='date_ranges.nodegroup_id', terms=permitted_nodegroups))
                temporal_query.should(Nested(path='date_ranges', query=date_ranges_query))
            temporal_query.should(Nested(path='dates', query=date_query))


        search_query.filter(temporal_query)
        #print search_query.dsl

    datatype_factory = DataTypeFactory()
    if len(advanced_filters) > 0:
        advanced_query = Bool()
        grouped_query = Bool()
        grouped_queries = [grouped_query]
        for index, advanced_filter in enumerate(advanced_filters):
            tile_query = Bool()
            for key, val in advanced_filter.iteritems():
                if key != 'op':
                    node = models.Node.objects.get(pk=key)
                    if request.user.has_perm('read_nodegroup', node.nodegroup):
                        datatype = datatype_factory.get_instance(node.datatype)
                        datatype.append_search_filters(val, node, tile_query, request)
            nested_query = Nested(path='tiles', query=tile_query)
            if advanced_filter['op'] == 'or' and index != 0:
                grouped_query = Bool()
                grouped_queries.append(grouped_query)
            grouped_query.must(nested_query)
        for grouped_query in grouped_queries:
            advanced_query.should(grouped_query)
        search_query.must(advanced_query)

    query.add_query(search_query)
    if search_buffer != None:
        search_buffer = search_buffer.geojson
    return {'query': query, 'search_buffer':search_buffer}
Beispiel #49
0
def build_search_results_dsl(request):
    term_filter = request.GET.get('termFilter', '')
    spatial_filter = JSONDeserializer().deserialize(request.GET.get('spatialFilter', None)) 
    export = request.GET.get('export', None)
    page = 1 if request.GET.get('page') == '' else int(request.GET.get('page', 1))
    temporal_filter = JSONDeserializer().deserialize(request.GET.get('temporalFilter', None))

    se = SearchEngineFactory().create()

    if export != None:
        limit = settings.SEARCH_EXPORT_ITEMS_PER_PAGE  
    else:
        limit = settings.SEARCH_ITEMS_PER_PAGE
    
    query = Query(se, start=limit*int(page-1), limit=limit)
    boolquery = Bool()
    boolfilter = Bool()
    
    if term_filter != '':
        for term in JSONDeserializer().deserialize(term_filter):
            if term['type'] == 'term':
                entitytype = models.EntityTypes.objects.get(conceptid_id=term['context'])
                boolfilter_nested = Bool()
                boolfilter_nested.must(Terms(field='child_entities.entitytypeid', terms=[entitytype.pk]))
                boolfilter_nested.must(Match(field='child_entities.value', query=term['value'], type='phrase'))
                nested = Nested(path='child_entities', query=boolfilter_nested)
                if term['inverted']:
                    boolfilter.must_not(nested)
                else:    
                    boolfilter.must(nested)
            elif term['type'] == 'concept':
                concept_ids = _get_child_concepts(term['value'])
                terms = Terms(field='domains.conceptid', terms=concept_ids)
                nested = Nested(path='domains', query=terms)
                if term['inverted']:
                    boolfilter.must_not(nested)
                else:
                    boolfilter.must(nested)
            elif term['type'] == 'string':
                boolfilter_folded = Bool()
                boolfilter_folded.should(Match(field='child_entities.value', query=term['value'], type='phrase_prefix'))
                boolfilter_folded.should(Match(field='child_entities.value.folded', query=term['value'], type='phrase_prefix'))
                nested = Nested(path='child_entities', query=boolfilter_folded)
                if term['inverted']:
                    boolquery.must_not(nested)
                else:    
                    boolquery.must(nested)

    if 'geometry' in spatial_filter and 'type' in spatial_filter['geometry'] and spatial_filter['geometry']['type'] != '':
        geojson = spatial_filter['geometry']
        if geojson['type'] == 'bbox':
            coordinates = [[geojson['coordinates'][0],geojson['coordinates'][3]], [geojson['coordinates'][2],geojson['coordinates'][1]]]
            geoshape = GeoShape(field='geometries.value', type='envelope', coordinates=coordinates )
            nested = Nested(path='geometries', query=geoshape)
        else:
            buffer = spatial_filter['buffer']
            geojson = JSONDeserializer().deserialize(_buffer(geojson,buffer['width'],buffer['unit']).json)
            geoshape = GeoShape(field='geometries.value', type=geojson['type'], coordinates=geojson['coordinates'] )
            nested = Nested(path='geometries', query=geoshape)

        if 'inverted' not in spatial_filter:
            spatial_filter['inverted'] = False

        if spatial_filter['inverted']:
            boolfilter.must_not(nested)
        else:
            boolfilter.must(nested)

    if 'year_min_max' in temporal_filter and len(temporal_filter['year_min_max']) == 2:
        start_date = date(temporal_filter['year_min_max'][0], 1, 1)
        end_date = date(temporal_filter['year_min_max'][1], 12, 31)
        if start_date:
            start_date = start_date.isoformat()
        if end_date:
            end_date = end_date.isoformat()
        range = Range(field='dates.value', gte=start_date, lte=end_date)
        nested = Nested(path='dates', query=range)
        
        if 'inverted' not in temporal_filter:
            temporal_filter['inverted'] = False

        if temporal_filter['inverted']:
            boolfilter.must_not(nested)
        else:
            boolfilter.must(nested)
        
    if not boolquery.empty:
        query.add_query(boolquery)

    if not boolfilter.empty:
        query.add_filter(boolfilter)

    return query
Beispiel #50
0
def search(request):
    se = SearchEngineFactory().create()
    searchString = request.GET['q']
    removechildren = request.GET.get('removechildren', None)
    query = Query(se, start=0, limit=100)
    phrase = Match(field='value', query=searchString.lower(), type='phrase_prefix')
    query.add_query(phrase)
    results = query.search(index='strings', doc_type='concept')

    ids = []
    if removechildren != None:
        ids =  [concept[0] for concept in Concept().get_child_concepts(removechildren, columns="conceptidto::text")]
        ids.append(removechildren)

    newresults = []
    cached_scheme_names = {}
    for result in results['hits']['hits']:
        if result['_source']['conceptid'] not in ids:
            # first look to see if we've already retrieved the top concept name
            # else look up the top concept name with ES and cache the result
            top_concept = result['_source']['top_concept']
            if top_concept in cached_scheme_names:
                result['in_scheme_name'] = cached_scheme_names[top_concept]
            else:
                query = Query(se, start=0, limit=100)
                phrase = Match(field='conceptid', query=top_concept, type='phrase')
                query.add_query(phrase)
                scheme = query.search(index='strings', doc_type='concept')
                for label in scheme['hits']['hits']:
                    if label['_source']['type'] == 'prefLabel':
                        cached_scheme_names[top_concept] = label['_source']['value']
                        result['in_scheme_name'] = label['_source']['value']

            newresults.append(result)

    # Use the db to get the concept context but this is SLOW
    # for result in results['hits']['hits']:
    #     if result['_source']['conceptid'] not in ids:
    #         concept = Concept().get(id=result['_source']['conceptid'], include_parentconcepts=True)
    #         pathlist = concept.get_paths()
    #         result['in_scheme_name'] = pathlist[0][0]['label']
    #         newresults.append(result)


    # def crawl(conceptid, path=[]):
    #     query = Query(se, start=0, limit=100)
    #     bool = Bool()
    #     bool.must(Match(field='conceptto', query=conceptid, type='phrase'))
    #     bool.must(Match(field='relationtype', query='narrower', type='phrase'))
    #     query.add_query(bool)
    #     relations = query.search(index='concept_relations')
    #     for relation in relations['hits']['hits']:
    #         path.insert(0, relation)
    #         crawl(relation['_source']['conceptfrom'], path=path)
    #     return path

    # for result in results['hits']['hits']:
    #     if result['_source']['conceptid'] not in ids:
    #         concept_relations = crawl(result['_source']['conceptid'], path=[])
    #         if len(concept_relations) > 0:
    #             conceptid = concept_relations[0]['_source']['conceptfrom']
    #             if conceptid in cached_scheme_names:
    #                 result['in_scheme_name'] = cached_scheme_names[conceptid]
    #             else:
    #                 result['in_scheme_name'] = get_preflabel_from_conceptid(conceptid, lang=settings.LANGUAGE_CODE)['value']
    #                 cached_scheme_names[conceptid] = result['in_scheme_name']

    #         newresults.append(result)

    results['hits']['hits'] = newresults
    return JSONResponse(results)
Beispiel #51
0
def search(request):
    se = SearchEngineFactory().create()
    searchString = request.GET["q"]
    removechildren = request.GET.get("removechildren", None)
    query = Query(se, start=0, limit=100)
    phrase = Match(field="value", query=searchString.lower(), type="phrase_prefix")
    query.add_query(phrase)
    results = query.search(index="concept_labels")

    ids = []
    if removechildren != None:
        concepts = Concept().get(id=removechildren, include_subconcepts=True, include=None)

        def get_children(concept):
            ids.append(concept.id)

        concepts.traverse(get_children)

    newresults = []
    cached_scheme_names = {}
    for result in results["hits"]["hits"]:
        if result["_source"]["conceptid"] not in ids:
            # first look to see if we've already retrieved the scheme name
            # else look up the scheme name with ES and cache the result
            if result["_type"] in cached_scheme_names:
                result["in_scheme_name"] = cached_scheme_names[result["_type"]]
            else:
                query = Query(se, start=0, limit=100)
                phrase = Match(field="conceptid", query=result["_type"], type="phrase")
                query.add_query(phrase)
                scheme = query.search(index="concept_labels")
                for label in scheme["hits"]["hits"]:
                    if label["_source"]["type"] == "prefLabel":
                        cached_scheme_names[result["_type"]] = label["_source"]["value"]
                        result["in_scheme_name"] = label["_source"]["value"]

            newresults.append(result)

    # Use the db to get the concept context but this is SLOW
    # for result in results['hits']['hits']:
    #     if result['_source']['conceptid'] not in ids:
    #         concept = Concept().get(id=result['_source']['conceptid'], include_parentconcepts=True)
    #         pathlist = concept.get_paths()
    #         result['in_scheme_name'] = pathlist[0][0]['label']
    #         newresults.append(result)

    # def crawl(conceptid, path=[]):
    #     query = Query(se, start=0, limit=100)
    #     bool = Bool()
    #     bool.must(Match(field='conceptidto', query=conceptid, type='phrase'))
    #     bool.must(Match(field='relationtype', query='narrower', type='phrase'))
    #     query.add_query(bool)
    #     relations = query.search(index='concept_relations')
    #     for relation in relations['hits']['hits']:
    #         path.insert(0, relation)
    #         crawl(relation['_source']['conceptidfrom'], path=path)
    #     return path

    # for result in results['hits']['hits']:
    #     if result['_source']['conceptid'] not in ids:
    #         concept_relations = crawl(result['_source']['conceptid'], path=[])
    #         if len(concept_relations) > 0:
    #             conceptid = concept_relations[0]['_source']['conceptidfrom']
    #             if conceptid in cached_scheme_names:
    #                 result['in_scheme_name'] = cached_scheme_names[conceptid]
    #             else:
    #                 result['in_scheme_name'] = get_preflabel_from_conceptid(conceptid, lang=settings.LANGUAGE_CODE)['value']
    #                 cached_scheme_names[conceptid] = result['in_scheme_name']

    #         newresults.append(result)

    results["hits"]["hits"] = newresults
    return JSONResponse(results)
Beispiel #52
0
def build_base_search_results_dsl(request):
    term_filter = request.GET.get('termFilter', '')
    spatial_filter = JSONDeserializer().deserialize(request.GET.get('spatialFilter', None)) 
    export = request.GET.get('export', None)
    page = 1 if request.GET.get('page') == '' else int(request.GET.get('page', 1))
    temporal_filter = JSONDeserializer().deserialize(request.GET.get('temporalFilter', None))

    se = SearchEngineFactory().create()

    if export != None:
        limit = settings.SEARCH_EXPORT_ITEMS_PER_PAGE  
    else:
        limit = settings.SEARCH_ITEMS_PER_PAGE
    
    query = Query(se, start=limit*int(page-1), limit=limit)
    boolquery = Bool()
    boolfilter = Bool()
    
    if term_filter != '':
        # Ce uporabnik ni avtenticiran, prikazemo le veljavne (to je verjetno potrebno se dodelati (mogoce da vidijo le svoje???)!!!)
        if (request.user.username == 'anonymous'):
            auto_filter = []
            for item in JSONDeserializer().deserialize(term_filter):
               auto_filter.append(item) 
            
            # Poiscimo concept id in context za Published status
            AUTO_TERM_FILTER = get_auto_filter(request)
            
            auto_filter.append(AUTO_TERM_FILTER)
            term_filter = JSONSerializer().serialize(auto_filter)
            
    print 'term_filter'
    if term_filter != '':
        for term in JSONDeserializer().deserialize(term_filter):
            print term
            if term['type'] == 'term':
                entitytype = models.EntityTypes.objects.get(conceptid_id=term['context'])
                boolfilter_nested = Bool()
                boolfilter_nested.must(Terms(field='child_entities.entitytypeid', terms=[entitytype.pk]))
                boolfilter_nested.must(Match(field='child_entities.value', query=term['value'], type='phrase'))
                nested = Nested(path='child_entities', query=boolfilter_nested)
                if term['inverted']:
                    boolfilter.must_not(nested)
                else:    
                    boolfilter.must(nested)
            elif term['type'] == 'concept':
                concept_ids = _get_child_concepts(term['value'])
                terms = Terms(field='domains.conceptid', terms=concept_ids)
                nested = Nested(path='domains', query=terms)
                if term['inverted']:
                    boolfilter.must_not(nested)
                else:
                    boolfilter.must(nested)
            elif term['type'] == 'string':
                boolfilter_folded = Bool()
                boolfilter_folded.should(Match(field='child_entities.value', query=term['value'], type='phrase_prefix'))
                boolfilter_folded.should(Match(field='child_entities.value.folded', query=term['value'], type='phrase_prefix'))
                nested = Nested(path='child_entities', query=boolfilter_folded)
                if term['inverted']:
                    boolquery.must_not(nested)
                else:    
                    boolquery.must(nested)
    if 'geometry' in spatial_filter and 'type' in spatial_filter['geometry'] and spatial_filter['geometry']['type'] != '':
        geojson = spatial_filter['geometry']
        if geojson['type'] == 'bbox':
            coordinates = [[geojson['coordinates'][0],geojson['coordinates'][3]], [geojson['coordinates'][2],geojson['coordinates'][1]]]
            geoshape = GeoShape(field='geometries.value', type='envelope', coordinates=coordinates )
            nested = Nested(path='geometries', query=geoshape)
        else:
            buffer = spatial_filter['buffer']
            geojson = JSONDeserializer().deserialize(_buffer(geojson,buffer['width'],buffer['unit']).json)
            geoshape = GeoShape(field='geometries.value', type=geojson['type'], coordinates=geojson['coordinates'] )
            nested = Nested(path='geometries', query=geoshape)

        if 'inverted' not in spatial_filter:
            spatial_filter['inverted'] = False

        if spatial_filter['inverted']:
            boolfilter.must_not(nested)
        else:
            boolfilter.must(nested)

    if 'year_min_max' in temporal_filter and len(temporal_filter['year_min_max']) == 2:
        start_date = date(temporal_filter['year_min_max'][0], 1, 1)
        end_date = date(temporal_filter['year_min_max'][1], 12, 31)
        if start_date:
            start_date = start_date.isoformat()
        if end_date:
            end_date = end_date.isoformat()
        range = Range(field='dates.value', gte=start_date, lte=end_date)
        nested = Nested(path='dates', query=range)
        
        if 'inverted' not in temporal_filter:
            temporal_filter['inverted'] = False

        if temporal_filter['inverted']:
            boolfilter.must_not(nested)
        else:
            boolfilter.must(nested)
        
    if not boolquery.empty:
        query.add_query(boolquery)

    if not boolfilter.empty:
        query.add_filter(boolfilter)

    return query
Beispiel #53
0
def build_search_results_dsl(request):
#    Results are sorted ascendingly by the value of SITE_ID.E42, which is displayed as primary name of Heritage Resources. 
#    Must go back to this method once new Automatic Resource ID has been fully developed (AZ 10/08/16) Update 06/09/16: EAMENA_ID.E42 now used as sorting criterion.

    sorting = {
		"child_entities.label":  {
			"order" : "asc",
			"nested_path": "child_entities",
			"nested_filter": {
				"term": {"child_entities.entitytypeid" : "EAMENA_ID.E42"}
			}
		}
	}
    
    term_filter = request.GET.get('termFilter', '')
    
    
    spatial_filter = JSONDeserializer().deserialize(request.GET.get('spatialFilter', None)) 
    export = request.GET.get('export', None)
    page = 1 if request.GET.get('page') == '' else int(request.GET.get('page', 1))
    temporal_filter = JSONDeserializer().deserialize(request.GET.get('temporalFilter', None))
    boolean_search = request.GET.get('booleanSearch', '')
    filter_and_or = JSONDeserializer().deserialize(request.GET.get('termFilterAndOr', ''))
    filter_grouping = JSONDeserializer().deserialize(request.GET.get('termFilterGroup', ''))
    
    filter_combine_flags = JSONDeserializer().deserialize(request.GET.get('termFilterCombineWithPrev', ''))
    #Ignore first entry as it is a dummy
    filter_combine_flags = filter_combine_flags[1:]
    # filter_combine_flags = [False, True, False, False, False]
    
    # filter_groups = JSONDeserializer().deserialize(request.GET.get('termFilterGroups', ''))
    # Not here yet, so put in some bogus data
    # filter_groups = [
    #     'NAME.E41',
    #     'NAME.E41',
    #     'DISTURBANCE_STATE.E3',
    #     'THREAT_STATE.E3'
    # ]
    
    se = SearchEngineFactory().create()

    if export != None:
        limit = settings.SEARCH_EXPORT_ITEMS_PER_PAGE  
    else:
        limit = settings.SEARCH_ITEMS_PER_PAGE
    
    query = Query(se, start=limit*int(page-1), limit=limit)
    boolquery = Bool()
    boolfilter = Bool()
    is_empty_temporal_filter = True

    # store each search term in an initially. These will be combined based on the global and/or and the optional groupings
    terms_queries = [];

    # logging.warning("-------QUERY-------")

    if term_filter != '' or not is_empty_temporal_filter:
        for index, select_box in enumerate(JSONDeserializer().deserialize(term_filter)):
            selectbox_boolfilter = Bool()
            
            groupid = filter_grouping[index]
            if not groupid == 'No group':
                # build a nested query against the nested_entities
                
                # trace the path from each term to the group root
                term_paths = []
                for term in select_box:

                    # trace path from group root to this term
                    if term['type'] == 'concept':
                        
                        # get the parent concept for this value i.e. the field
                        term_parent_concept = Concept.get_parent_concept(term['value'])
                        
                        # get the steps from the root to that concept
                        if term_parent_concept.nodetype.nodetype == "Collection":
                            term_schema = Entity.get_mapping_schema_to(term_parent_concept.legacyoid)
                        elif term_parent_concept.nodetype.nodetype == 'Concept':
                            # need to get at the parent until we reach the root collection. concepts are arranged hierarchically
                            parent_relations_to = models.ConceptRelations.objects.filter(conceptidto=term_parent_concept.conceptid, relationtype='member')
                            grandparent = models.Concepts.objects.filter(conceptid=parent_relations_to[0].conceptidfrom)
                            term_schema = Entity.get_mapping_schema_to(grandparent[0].legacyoid)
                        
                        #this path begins at the root, and ends up at the node in question
                        term_path = term_schema['HERITAGE_RESOURCE_GROUP.E27']['steps']
                        
                        term_paths.append({
                            'term': term,
                            'path': term_path
                        })
                        
                    elif term['type'] == 'term':

                        concept = models.Concepts.objects.get(conceptid=term['context'])
                        term_schema = Entity.get_mapping_schema_to(concept.legacyoid)
                        term_path = term_schema['HERITAGE_RESOURCE_GROUP.E27']['steps']
                        
                        term_paths.append({
                            'term': term,
                            'path': term_path
                        })

                    elif term['type'] == 'string':
                        term_schema = Entity.get_mapping_schema_to(groupid)
                        term_path = term_schema['HERITAGE_RESOURCE_GROUP.E27']['steps']
                        
                        term_paths.append({
                            'term': term,
                            'path': term_path
                        })
                        
                if 'year_min_max' in temporal_filter[index] and len(temporal_filter[index]['year_min_max']) == 2:
                    start_date = date(temporal_filter[index]['year_min_max'][0], 1, 1)
                    end_date = date(temporal_filter[index]['year_min_max'][1], 12, 31)
                    if start_date:
                        start_date = start_date.isoformat()
                    if end_date:
                        end_date = end_date.isoformat()

                    if 'inverted' not in temporal_filter[index]:
                        inverted_temporal_filter = False
                    else:
                        if temporal_filter[index]['inverted']:
                            inverted_temporal_filter = True
                        else:
                            inverted_temporal_filter = False
                    
                    term_paths.append({
                        'term': {
                            'date_operator': '3',
                            'start_date': start_date,
                            'end_date': end_date,
                            'type': 'date',
                            'inverted': inverted_temporal_filter
                        },
                        'path': term_path
                    })
                    
                    
                if 'filters' in temporal_filter[index]:
                    term_schema = Entity.get_mapping_schema_to(groupid)
                    term_path = term_schema['HERITAGE_RESOURCE_GROUP.E27']['steps']

                    for temporal_filter_item in temporal_filter[index]['filters']:
                        date_type = ''
                        searchdate = ''
                        date_operator = ''
                        for node in temporal_filter_item['nodes']:
                            if node['entitytypeid'] == 'DATE_COMPARISON_OPERATOR.E55':
                                date_operator = node['value']
                            elif node['entitytypeid'] == 'date':
                                searchdate = node['value']
                            else:
                                date_type = node['value']
                
                        date_value = datetime.strptime(searchdate, '%Y-%m-%d').isoformat()
                        if 'inverted' not in temporal_filter[index]:
                            inverted_temporal_filter = False
                        else:
                            if temporal_filter[index]['inverted']:
                                inverted_temporal_filter = True
                            else:
                                inverted_temporal_filter = False
                                
                        term_paths.append({
                            'term': {
                                'date_operator': date_operator,
                                'date_value': date_value,
                                'type': 'date',
                                'inverted': inverted_temporal_filter
                            },
                            'path': term_path
                        })

                # combine the traced path to build a nested query                
                group_query = nested_query_from_pathed_values(term_paths, 'nested_entity.child_entities')

                
                # add nested query to overall query
                selectbox_boolfilter.must(group_query)
                
                # logging.warning("BOX QUERY - %s", JSONSerializer().serialize(selectbox_boolfilter, indent=2))

            else:    
                for term in select_box:
                    
                    if term['type'] == 'term':
                        entitytype = models.EntityTypes.objects.get(conceptid_id=term['context'])
                        boolfilter_nested = Bool()
                        boolfilter_nested.must(Terms(field='child_entities.entitytypeid', terms=[entitytype.pk]))
                        boolfilter_nested.must(Match(field='child_entities.value', query=term['value'], type='phrase'))
                        nested = Nested(path='child_entities', query=boolfilter_nested)
                        if filter_and_or[index] == 'or':
                            if not term['inverted']:
                                selectbox_boolfilter.should(nested)
                        else:
                            if term['inverted']:
                                selectbox_boolfilter.must_not(nested)
                            else:    
                                selectbox_boolfilter.must(nested)
                                
                    elif term['type'] == 'concept':
                        concept_ids = _get_child_concepts(term['value'])
                        terms = Terms(field='domains.conceptid', terms=concept_ids)
                        nested = Nested(path='domains', query=terms)
                        if filter_and_or[index] == 'or':
                            if not term['inverted']:
                                    selectbox_boolfilter.should(nested)
                        else:
                            if term['inverted']:
                                selectbox_boolfilter.must_not(nested)
                            else:
                                selectbox_boolfilter.must(nested)
                                
                    elif term['type'] == 'string':
                        boolquery2 = Bool() #This bool contains the subset of nested string queries on both domains and child_entities paths
                        boolfilter_folded = Bool() #This bool searches by string in child_entities, where free text strings get indexed
                        boolfilter_folded2 = Bool() #This bool searches by string in the domains path,where controlled vocabulary concepts get indexed
                        boolfilter_folded.should(Match(field='child_entities.value', query=term['value'], type='phrase_prefix', fuzziness='AUTO', operator='and'))
                        boolfilter_folded.should(Match(field='child_entities.value.folded', query=term['value'], type='phrase_prefix', fuzziness='AUTO', operator='and'))
                        boolfilter_folded.should(Match(field='child_entities.value.folded', query=term['value'], fuzziness='AUTO', operator='and'))
                        nested = Nested(path='child_entities', query=boolfilter_folded)
                        boolfilter_folded2.should(Match(field='domains.label', query=term['value'], type='phrase_prefix', fuzziness='AUTO', operator='and'))
                        boolfilter_folded2.should(Match(field='domains.label.folded', query=term['value'], type='phrase_prefix', fuzziness='AUTO', operator='and'))
                        boolfilter_folded2.should(Match(field='domains.label.folded', query=term['value'], fuzziness='AUTO', operator='and'))
                        nested2 = Nested(path='domains', query=boolfilter_folded2)
                        boolquery2.should(nested)
                        boolquery2.should(nested2)
                        if filter_and_or[index] == 'or':
                            if not term['inverted']:
                                # use boolfilter here instead of boolquery because boolquery
                                # can't be combined with other boolfilters using boolean OR
                                selectbox_boolfilter.should(boolquery2)
                        else:
                            if term['inverted']:
                                selectbox_boolfilter.must_not(boolquery2)
                            else:    
                                selectbox_boolfilter.must(boolquery2)
                            
                if 'year_min_max' in temporal_filter[index] and len(temporal_filter[index]['year_min_max']) == 2:
                    start_date = date(temporal_filter[index]['year_min_max'][0], 1, 1)
                    end_date = date(temporal_filter[index]['year_min_max'][1], 12, 31)
                    if start_date:
                        start_date = start_date.isoformat()
                    if end_date:
                        end_date = end_date.isoformat()
                    range = Range(field='dates.value', gte=start_date, lte=end_date)
                    nested = Nested(path='dates', query=range)
            
                    if 'inverted' not in temporal_filter[index]:
                        temporal_filter[index]['inverted'] = False

                    if temporal_filter[index]['inverted']:
                        selectbox_boolfilter.must_not(nested)
                    else:
                        selectbox_boolfilter.must(nested)
                        
                if 'filters' in temporal_filter[index]:
                    for temporal_filter_item in temporal_filter[index]['filters']:
                        date_type = ''
                        searchdate = ''
                        date_operator = ''
                        for node in temporal_filter_item['nodes']:
                            if node['entitytypeid'] == 'DATE_COMPARISON_OPERATOR.E55':
                                date_operator = node['value']
                            elif node['entitytypeid'] == 'date':
                                searchdate = node['value']
                            else:
                                date_type = node['value']


                        date_value = datetime.strptime(searchdate, '%Y-%m-%d').isoformat()

                        if date_operator == '1': # equals query
                            range = Range(field='dates.value', gte=date_value, lte=date_value)
                        elif date_operator == '0': # greater than query 
                            range = Range(field='dates.value', lt=date_value)
                        elif date_operator == '2': # less than query
                            range = Range(field='dates.value', gt=date_value)
                        
                        nested = Nested(path='dates', query=range)
                        if 'inverted' not in temporal_filter[index]:
                            temporal_filter[index]['inverted'] = False

                        if temporal_filter[index]['inverted']:
                            selectbox_boolfilter.must_not(nested)
                        else:
                            selectbox_boolfilter.must(nested)


            terms_queries.append(selectbox_boolfilter)
            # if not selectbox_boolfilter.empty:
            #     if boolean_search == 'or':
            #         boolfilter.should(selectbox_boolfilter)
            #     else:
            #         boolfilter.must(selectbox_boolfilter)
        
        # We now have individual query terms for each of the search components. Combine into one group now
        # Start by building a an array of groups which will be combined according to the global And/Or
        # Queries within one of these groups will be combined by the complement of the global And/Or
        # We may end up with [ [A,B], [C], [D,E] ], which would translate to either:
        #    (A || B) && C && (D || E)
        #       or
        #    (A && B) || C || (D && E)
        # for global AND or OR respectively
        
        # logging.warning("TERMS QUERIES %s", terms_queries)
        
        bool_components = [];
        
        for i, term_query in enumerate(terms_queries):
            if i is 0:
                bool_components.append([term_query])
            else:
                should_group_with_previous = filter_combine_flags[i-1]
                if should_group_with_previous:
                    bool_components[-1].append(term_query)
                else:
                    bool_components.append([term_query])
            
        # logging.warning("BOOL COMPONENTS %s", bool_components)
        # Now build the ES queries
        for bool_component in bool_components:
            if len(bool_component) is 1:
                # just combine this on its own
                q = bool_component[0]
            else:
                q = Bool()
                for sub_component in bool_component:
                    if boolean_search == 'or':
                        #apply the OPPOSITE of the global boolean operator
                        q.must(sub_component)
                    else:
                        q.should(sub_component)
                        
            # combine to the overall query according to the global boolean operator
            if boolean_search == 'or':
                boolfilter.should(q)
            else:
                boolfilter.must(q)

    if 'geometry' in spatial_filter and 'type' in spatial_filter['geometry'] and spatial_filter['geometry']['type'] != '':
        geojson = spatial_filter['geometry']
        if geojson['type'] == 'bbox':
            coordinates = [[geojson['coordinates'][0],geojson['coordinates'][3]], [geojson['coordinates'][2],geojson['coordinates'][1]]]
            geoshape = GeoShape(field='geometries.value', type='envelope', coordinates=coordinates )
            nested = Nested(path='geometries', query=geoshape)
        else:
            buffer = spatial_filter['buffer']
            geojson = JSONDeserializer().deserialize(_buffer(geojson,buffer['width'],buffer['unit']).json)
            geoshape = GeoShape(field='geometries.value', type=geojson['type'], coordinates=geojson['coordinates'] )
            nested = Nested(path='geometries', query=geoshape)

        if 'inverted' not in spatial_filter:
            spatial_filter['inverted'] = False

        if spatial_filter['inverted']:
            boolfilter.must_not(nested)
        else:
            boolfilter.must(nested)

    if not boolquery.empty:
        query.add_query(boolquery)

    if not boolfilter.empty:
        query.add_filter(boolfilter)
    
#  Sorting criterion added to query (AZ 10/08/16)
    query.dsl.update({'sort': sorting})
    # logging.warning("-=-==-=-===-=--=-==-=-===-=- query: -=-==-=-===-=--=-==-=-===-=-> %s", query)

    return query