Beispiel #1
0
def get_related_resources(resourceid, lang, limit=1000, start=0):
    ret = {
        'resource_relationships': [],
        'related_resources': []
    }
    se = SearchEngineFactory().create()

    query = Query(se, limit=limit, start=start)
    query.add_filter(Terms(field='entityid1', terms=resourceid).dsl, operator='or')
    query.add_filter(Terms(field='entityid2', terms=resourceid).dsl, operator='or')
    resource_relations = query.search(index='resource_relations', doc_type='all')
    ret['total'] = resource_relations['hits']['total']

    entityids = set()
    for relation in resource_relations['hits']['hits']:
        relation['_source']['preflabel'] = get_preflabel_from_valueid(relation['_source']['relationshiptype'], lang)
        ret['resource_relationships'].append(relation['_source'])
        entityids.add(relation['_source']['entityid1'])
        entityids.add(relation['_source']['entityid2'])
    if len(entityids) > 0:
        entityids.remove(resourceid)   

    related_resources = se.search(index='entity', doc_type='_all', id=list(entityids))
    if related_resources:
        for resource in related_resources['docs']:
            ret['related_resources'].append(resource['_source'])

    return ret
Beispiel #2
0
def get_preflabel_from_conceptid(conceptid, lang):
    ret = None
    default = {
        "category": "",
        "conceptid": "",
        "language": "",
        "value": "",
        "type": "",
        "id": ""
    }
    se = SearchEngineFactory().create()
    query = Query(se)
    terms = Terms(field='conceptid', terms=[conceptid])
    match = Match(field='type', query='preflabel', type='phrase')
    query.add_filter(terms)
    query.add_query(match)
    preflabels = query.search(index='concept_labels')['hits']['hits']
    for preflabel in preflabels:
        default = preflabel['_source']
        # get the label in the preferred language, otherwise get the label in the default language
        if preflabel['_source']['language'] == lang:
            return preflabel['_source']
        if preflabel['_source']['language'].split('-')[0] == lang.split(
                '-')[0]:
            ret = preflabel['_source']
        if preflabel['_source'][
                'language'] == settings.LANGUAGE_CODE and ret == None:
            ret = preflabel['_source']
    return default if ret == None else ret
Beispiel #3
0
def get_preflabel_from_conceptid(conceptid, lang):
    ret = None
    default = {
        "category": "",
        "conceptid": "",
        "language": "",
        "value": "",
        "type": "",
        "id": ""
    }
    se = SearchEngineFactory().create()
    query = Query(se)
    terms = Terms(field='conceptid', terms=[conceptid])
    # Uncomment the following line only after having reindexed ElasticSearch cause currently the Arabic labels are indexed as altLabels
    #     match = Match(field='type', query='prefLabel', type='phrase')
    query.add_filter(terms)
    # Uncomment the following line only after having reindexed ElasticSearch cause currently the Arabic labels are indexed as altLabels
    #     query.add_query(match)

    preflabels = query.search(index='concept_labels')['hits']['hits']
    for preflabel in preflabels:
        #         print 'Language at this point %s and label language %s and ret is %s' % (lang, preflabel['_source']['language'], ret)
        default = preflabel['_source']
        # get the label in the preferred language, otherwise get the label in the default language
        if preflabel['_source']['language'] == lang:
            #             print 'prefLabel from Conceptid: %s' % preflabel['_source']
            return preflabel['_source']
        if preflabel['_source']['language'].split('-')[0] == lang.split(
                '-')[0]:
            ret = preflabel['_source']
        if preflabel['_source']['language'] == lang and ret == None:
            ret = preflabel['_source']
    return default if ret == None else ret
Beispiel #4
0
def get_related_resources(resourceid, lang, limit=1000, start=0):
    ret = {'resource_relationships': [], 'related_resources': []}
    se = SearchEngineFactory().create()

    query = Query(se, limit=limit, start=start)
    query.add_filter(Terms(field='entityid1', terms=resourceid).dsl,
                     operator='or')
    query.add_filter(Terms(field='entityid2', terms=resourceid).dsl,
                     operator='or')
    resource_relations = query.search(index='resource_relations',
                                      doc_type='all')
    ret['total'] = resource_relations['hits']['total']

    entityids = set()
    for relation in resource_relations['hits']['hits']:
        relation['_source']['preflabel'] = get_preflabel_from_valueid(
            relation['_source']['relationshiptype'], lang)
        ret['resource_relationships'].append(relation['_source'])
        entityids.add(relation['_source']['entityid1'])
        entityids.add(relation['_source']['entityid2'])
    if len(entityids) > 0:
        entityids.remove(resourceid)

    related_resources = se.search(index='entity',
                                  doc_type='_all',
                                  id=list(entityids))
    if related_resources:
        for resource in related_resources['docs']:
            ret['related_resources'].append(resource['_source'])

    return ret
Beispiel #5
0
def get_preflabel_from_conceptid(conceptid, lang):
    ret = None
    default = {
        "category": "",
        "conceptid": "",
        "language": "",
        "value": "",
        "type": "",
        "id": ""
    }
    se = SearchEngineFactory().create()
    query = Query(se)
    terms = Terms(field='conceptid', terms=[conceptid])
    # Uncomment the following line only after having reindexed ElasticSearch cause currently the Arabic labels are indexed as altLabels
#     match = Match(field='type', query='prefLabel', type='phrase')
    query.add_filter(terms)
    # Uncomment the following line only after having reindexed ElasticSearch cause currently the Arabic labels are indexed as altLabels
#     query.add_query(match)

    preflabels = query.search(index='concept_labels')['hits']['hits'] 
    for preflabel in preflabels:
#         print 'Language at this point %s and label language %s and ret is %s' % (lang, preflabel['_source']['language'], ret)
        default = preflabel['_source']
        # get the label in the preferred language, otherwise get the label in the default language
        if preflabel['_source']['language'] == lang:
#             print 'prefLabel from Conceptid: %s' % preflabel['_source']
            return preflabel['_source']
        if preflabel['_source']['language'].split('-')[0] == lang.split('-')[0]:
            ret = preflabel['_source']
        if preflabel['_source']['language'] == lang and ret == None:
            ret = preflabel['_source']
    return default if ret == None else ret
Beispiel #6
0
def find_overlapping(request):
    '''This function queries ES when called via Ajax when a new geometry is created in the Location tab. If pre-existing resources are found within the perimeter of the polygon (or the buffered zone around a point/line/polygon), an alert is raised.'''
    geomString = request.GET.get('geom', '')
    geom = GEOSGeometry(geomString, srid=4326)
    mindistance = settings.METER_RADIUS
    if not mindistance:
        mindistance = 1000  # if settings.METER_RADIUS isn't set, default to 1Km
    geom.transform(3857)
    buffered_geom = geom.buffer(mindistance)
    buffered_geom.transform(4326)
    print geom, buffered_geom
    se = SearchEngineFactory().create()
    query = Query(se)
    boolfilter = Bool()
    geoshape = GeoShape(field='geometries.value',
                        type=buffered_geom.geom_type,
                        coordinates=buffered_geom.coords)
    nested = Nested(path='geometries', query=geoshape)
    boolfilter.must(nested)
    query.add_filter(boolfilter)
    results = query.search(index='entity', doc_type='')
    overlaps = []
    for hit in results['hits']['hits']:
        overlaps.append({
            'id': hit['_id'],
            'type': hit['_type'],
            'primaryname': hit['_source']['primaryname']
        })
    return JSONResponse(overlaps)
Beispiel #7
0
def get_preflabel_from_conceptid(conceptid, lang):
    ret = None
    default = {
        "category": "",
        "conceptid": "",
        "language": "",
        "value": "",
        "type": "",
        "id": ""
    }
    se = SearchEngineFactory().create()
    query = Query(se)
    terms = Terms(field='conceptid', terms=[conceptid])
    match = Match(field='type', query='preflabel', type='phrase')
    query.add_filter(terms)
    query.add_query(match)
    preflabels = query.search(index='concept_labels')['hits']['hits']
    for preflabel in preflabels:
        default = preflabel['_source']
        # get the label in the preferred language, otherwise get the label in the default language
        if preflabel['_source']['language'] == lang:
            return preflabel['_source']
        if preflabel['_source']['language'].split('-')[0] == lang.split('-')[0]:
            ret = preflabel['_source']
        if preflabel['_source']['language'] == settings.LANGUAGE_CODE and ret == None:
            ret = preflabel['_source']
    return default if ret == None else ret
Beispiel #8
0
def get_related_resources(resourceid, lang, limit=1000, start=0, allowedtypes=[], is_anon=False):

    ret = {
        'resource_relationships': [],
        'related_resources': []
    }
    se = SearchEngineFactory().create()

    query = Query(se, limit=limit, start=start)
    query.add_filter(Terms(field='entityid1', terms=resourceid).dsl, operator='or')
    query.add_filter(Terms(field='entityid2', terms=resourceid).dsl, operator='or')
    resource_relations = query.search(index='resource_relations', doc_type="all")

    entityids = set()
    for relation in resource_relations['hits']['hits']:
        relation['_source']['preflabel'] = get_preflabel_from_valueid(relation['_source']['relationshiptype'], lang)
        ret['resource_relationships'].append(relation['_source'])
        entityids.add(relation['_source']['entityid1'])
        entityids.add(relation['_source']['entityid2'])
    if len(entityids) > 0:
        entityids.remove(resourceid)

    # can't figure why passing allowed types to doc_type param doesn't work,
    # so filter is carried out later
    related_resources = se.search(index='entity', doc_type='_all', id=list(entityids))

    filtered_ids = []
    if related_resources:
        for resource in related_resources['docs']:
            if not resource['_type'] in allowedtypes:
                filtered_ids.append(resource['_source']['entityid'])
                continue
            
            if is_anon:
                # filter out protected resources if user is anonymous
                # (this is basically a subset of the get_protected_entityids below
                # they should be combined probably)
                from search import get_protection_conceptids
                protect_id = get_protection_conceptids(settings.PROTECTION_LEVEL_NODE)
                conceptids = [d['conceptid'] for d in resource['_source']['domains']]
                if protect_id in conceptids:
                    filtered_ids.append(resource['_source']['entityid'])
                    continue
            ret['related_resources'].append(resource['_source'])
    
    if len(filtered_ids) > 0:
        # remove all relationships in ret that match a filtered id (this lc is yuge but I think concise)
        filtered_relationships = [rel for rel in ret['resource_relationships'] if not rel['entityid1'] in filtered_ids and not rel['entityid2'] in filtered_ids]
        
        # update ret values
        ret['resource_relationships'] = filtered_relationships
        
    ret['total'] = len(ret['resource_relationships'])
    
    return ret
def get_related_resources(resourceid, lang='en-US', limit=1000, start=0):
    ret = {'resource_relationships': [], 'related_resources': []}
    se = SearchEngineFactory().create()

    query = Query(se, limit=limit, start=start)
    query.add_filter(Terms(field='entityid1', terms=resourceid).dsl,
                     operator='or')
    resource_relations = query.search(index='resource_relations',
                                      doc_type='all')
    ret['total'] = resource_relations['hits']['total']
    for relation in resource_relations['hits']['hits']:
        ret['resource_relationships'].append(relation['_source'])
    return ret
Beispiel #10
0
def get_related_resources(resourceid, lang='en-US', limit=1000, start=0):
    ret = {
        'resource_relationships': [],
        'related_resources': []
    }
    se = SearchEngineFactory().create()

    query = Query(se, limit=limit, start=start)
    query.add_filter(Terms(field='entityid1', terms=resourceid).dsl, operator='or')
    resource_relations = query.search(index='resource_relations', doc_type='all')
    ret['total'] = resource_relations['hits']['total']
    for relation in resource_relations['hits']['hits']:
        ret['resource_relationships'].append(relation['_source'])
    return ret
Beispiel #11
0
def get_preflabel_from_conceptid(conceptid, lang):
    ret = None
    default = {"category": "", "conceptid": "", "language": "", "value": "", "type": "", "id": ""}
    se = SearchEngineFactory().create()
    query = Query(se)
    terms = Terms(field="conceptid", terms=[conceptid])
    match = Match(field="type", query="preflabel", type="phrase")
    query.add_filter(terms)
    query.add_query(match)
    preflabels = query.search(index="concept_labels")["hits"]["hits"]
    for preflabel in preflabels:
        default = preflabel["_source"]
        # get the label in the preferred language, otherwise get the label in the default language
        if preflabel["_source"]["language"] == lang:
            return preflabel["_source"]
        if preflabel["_source"]["language"].split("-")[0] == lang.split("-")[0]:
            ret = preflabel["_source"]
        if preflabel["_source"]["language"] == settings.LANGUAGE_CODE and ret == None:
            ret = preflabel["_source"]
    return default if ret == None else ret
Beispiel #12
0
def get_related_resource_ids(resourceids, lang, limit=1000, start=0):
    se = SearchEngineFactory().create()
    query = Query(se, limit=limit, start=start)
    query.add_filter(Terms(field='entityid1', terms=resourceids).dsl, operator='or')
    query.add_filter(Terms(field='entityid2', terms=resourceids).dsl, operator='or')
    resource_relations = query.search(  index='resource_relations', doc_type='all')
    
    entityids = set()
    for relation in resource_relations['hits']['hits']: 
        # add the other halves add the relations which are not in the original list of ids
        from_is_original_result = relation['_source']['entityid1'] in resourceids
        to_is_original_result = relation['_source']['entityid2'] in resourceids
        
        if from_is_original_result:
            entityids.add(relation['_source']['entityid2'])
            
        if to_is_original_result:
            entityids.add(relation['_source']['entityid1'])
    
    return entityids
Beispiel #13
0
def get_related_resource_ids(resourceids, lang, limit=1000, start=0):
    se = SearchEngineFactory().create()
    query = Query(se, limit=limit, start=start)
    query.add_filter(Terms(field='entityid1', terms=resourceids).dsl,
                     operator='or')
    query.add_filter(Terms(field='entityid2', terms=resourceids).dsl,
                     operator='or')
    resource_relations = query.search(index='resource_relations',
                                      doc_type='all')

    entityids = set()
    for relation in resource_relations['hits']['hits']:
        # add the other halves add the relations which are not in the original list of ids
        from_is_original_result = relation['_source'][
            'entityid1'] in resourceids
        to_is_original_result = relation['_source']['entityid2'] in resourceids

        if from_is_original_result:
            entityids.add(relation['_source']['entityid2'])

        if to_is_original_result:
            entityids.add(relation['_source']['entityid1'])

    return entityids
Beispiel #14
0
def build_search_results_dsl(request):
#    Results are sorted ascendingly by the value of SITE_ID.E42, which is displayed as primary name of Heritage Resources. 
#    Must go back to this method once new Automatic Resource ID has been fully developed (AZ 10/08/16) Update 06/09/16: EAMENA_ID.E42 now used as sorting criterion.

    sorting = {
		"child_entities.label":  {
			"order" : "asc",
			"nested_path": "child_entities",
			"nested_filter": {
				"term": {"child_entities.entitytypeid" : "EAMENA_ID.E42"}
			}
		}
	}
    
    term_filter = request.GET.get('termFilter', '')
    
    
    spatial_filter = JSONDeserializer().deserialize(request.GET.get('spatialFilter', None)) 
    export = request.GET.get('export', None)
    page = 1 if request.GET.get('page') == '' else int(request.GET.get('page', 1))
    temporal_filter = JSONDeserializer().deserialize(request.GET.get('temporalFilter', None))
    boolean_search = request.GET.get('booleanSearch', '')
    filter_and_or = JSONDeserializer().deserialize(request.GET.get('termFilterAndOr', ''))
    filter_grouping = JSONDeserializer().deserialize(request.GET.get('termFilterGroup', ''))
    
    filter_combine_flags = JSONDeserializer().deserialize(request.GET.get('termFilterCombineWithPrev', ''))
    #Ignore first entry as it is a dummy
    filter_combine_flags = filter_combine_flags[1:]
    # filter_combine_flags = [False, True, False, False, False]
    
    # filter_groups = JSONDeserializer().deserialize(request.GET.get('termFilterGroups', ''))
    # Not here yet, so put in some bogus data
    # filter_groups = [
    #     'NAME.E41',
    #     'NAME.E41',
    #     'DISTURBANCE_STATE.E3',
    #     'THREAT_STATE.E3'
    # ]
    
    se = SearchEngineFactory().create()

    if export != None:
        limit = settings.SEARCH_EXPORT_ITEMS_PER_PAGE  
    else:
        limit = settings.SEARCH_ITEMS_PER_PAGE
    
    query = Query(se, start=limit*int(page-1), limit=limit)
    boolquery = Bool()
    boolfilter = Bool()
    is_empty_temporal_filter = True

    # store each search term in an initially. These will be combined based on the global and/or and the optional groupings
    terms_queries = [];

    # logging.warning("-------QUERY-------")

    if term_filter != '' or not is_empty_temporal_filter:
        for index, select_box in enumerate(JSONDeserializer().deserialize(term_filter)):
            selectbox_boolfilter = Bool()
            
            groupid = filter_grouping[index]
            if not groupid == 'No group':
                # build a nested query against the nested_entities

                # build a nested query for each resource type
                for resourcetype in settings.RESOURCE_TYPE_CONFIGS().keys():
                    # trace the path from each term to the group root
                    term_paths = []
                    for term in select_box:

                        # trace path from group root to this term
                        if term['type'] == 'concept':

                            # get all the parent concepts for this value i.e. the field
                            concept_relations = models.ConceptRelations.objects.filter(conceptidto=term['value'], relationtype="member")
                            for relation in concept_relations:
                                term_parent_concept = models.Concepts.objects.get(conceptid=relation.conceptidfrom)

                                # get the steps from the root to that concept
                                if term_parent_concept.nodetype.nodetype == "Collection":
                                    term_schema = Entity.get_mapping_schema_to(term_parent_concept.legacyoid)
                                elif term_parent_concept.nodetype.nodetype == 'Concept':
                                    # need to get at the parent until we reach the root collection. concepts are arranged hierarchically
                                    parent_relations_to = models.ConceptRelations.objects.filter(conceptidto=term_parent_concept.conceptid, relationtype='member')
                                    grandparent = models.Concepts.objects.filter(conceptid=parent_relations_to[0].conceptidfrom)
                                    term_schema = Entity.get_mapping_schema_to(grandparent[0].legacyoid)

                                #this path begins at the root, and ends up at the node in question
                                if resourcetype in term_schema:
                                    term_path = term_schema[resourcetype]['steps']

                                    term_paths.append({
                                        'term': term,
                                        'path': term_path
                                    })
                                    break

                        elif term['type'] == 'term':

                            concept = models.Concepts.objects.get(conceptid=term['context'])
                            term_schema = Entity.get_mapping_schema_to(concept.legacyoid)
                            if resourcetype in term_schema:
                                term_path = term_schema[resourcetype]['steps']

                                term_paths.append({
                                    'term': term,
                                    'path': term_path
                                })

                        elif term['type'] == 'string':
                            term_schema = Entity.get_mapping_schema_to(groupid)
                            if resourcetype in term_schema:
                                term_path = term_schema[resourcetype]['steps']

                                term_paths.append({
                                    'term': term,
                                    'path': term_path
                                })

                    if 'year_min_max' in temporal_filter[index] and len(temporal_filter[index]['year_min_max']) == 2:
                        start_date = date(temporal_filter[index]['year_min_max'][0], 1, 1)
                        end_date = date(temporal_filter[index]['year_min_max'][1], 12, 31)
                        if start_date:
                            start_date = start_date.isoformat()
                        if end_date:
                            end_date = end_date.isoformat()

                        if 'inverted' not in temporal_filter[index]:
                            inverted_temporal_filter = False
                        else:
                            if temporal_filter[index]['inverted']:
                                inverted_temporal_filter = True
                            else:
                                inverted_temporal_filter = False

                        term_paths.append({
                            'term': {
                                'date_operator': '3',
                                'start_date': start_date,
                                'end_date': end_date,
                                'type': 'date',
                                'inverted': inverted_temporal_filter
                            },
                            'path': term_path
                        })


                    if 'filters' in temporal_filter[index]:
                        term_schema = Entity.get_mapping_schema_to(groupid)
                        if resourcetype in term_schema:
                            term_path = term_schema[resourcetype]['steps']

                            for temporal_filter_item in temporal_filter[index]['filters']:
                                date_type = ''
                                searchdate = ''
                                date_operator = ''
                                for node in temporal_filter_item['nodes']:
                                    if node['entitytypeid'] == 'DATE_COMPARISON_OPERATOR.E55':
                                        date_operator = node['value']
                                    elif node['entitytypeid'] == 'date':
                                        searchdate = node['value']
                                    else:
                                        date_type = node['value']

                                date_value = datetime.strptime(searchdate, '%Y-%m-%d').isoformat()
                                if 'inverted' not in temporal_filter[index]:
                                    inverted_temporal_filter = False
                                else:
                                    if temporal_filter[index]['inverted']:
                                        inverted_temporal_filter = True
                                    else:
                                        inverted_temporal_filter = False

                                term_paths.append({
                                    'term': {
                                        'date_operator': date_operator,
                                        'date_value': date_value,
                                        'type': 'date',
                                        'inverted': inverted_temporal_filter
                                    },
                                    'path': term_path
                                })

                    # combine the traced path to build a nested query
                    group_query = nested_query_from_pathed_values(term_paths, 'nested_entity.child_entities')


                    # add nested query to overall query
                    selectbox_boolfilter.should(group_query)
                
                # logging.warning("BOX QUERY - %s", JSONSerializer().serialize(selectbox_boolfilter, indent=2))

            else:    
                for term in select_box:
                    
                    if term['type'] == 'term':
                        entitytype = models.EntityTypes.objects.get(conceptid_id=term['context'])
                        boolfilter_nested = Bool()
                        boolfilter_nested.must(Terms(field='child_entities.entitytypeid', terms=[entitytype.pk]))
                        boolfilter_nested.must(Match(field='child_entities.value', query=term['value'], type='phrase'))
                        nested = Nested(path='child_entities', query=boolfilter_nested)
                        if filter_and_or[index] == 'or':
                            if not term['inverted']:
                                selectbox_boolfilter.should(nested)
                        else:
                            if term['inverted']:
                                selectbox_boolfilter.must_not(nested)
                            else:    
                                selectbox_boolfilter.must(nested)
                                
                    elif term['type'] == 'concept':
                        concept_ids = _get_child_concepts(term['value'])
                        terms = Terms(field='domains.conceptid', terms=concept_ids)
                        nested = Nested(path='domains', query=terms)
                        if filter_and_or[index] == 'or':
                            if not term['inverted']:
                                    selectbox_boolfilter.should(nested)
                        else:
                            if term['inverted']:
                                selectbox_boolfilter.must_not(nested)
                            else:
                                selectbox_boolfilter.must(nested)
                                
                    elif term['type'] == 'string':
                        boolquery2 = Bool() #This bool contains the subset of nested string queries on both domains and child_entities paths
                        boolfilter_folded = Bool() #This bool searches by string in child_entities, where free text strings get indexed
                        boolfilter_folded2 = Bool() #This bool searches by string in the domains path,where controlled vocabulary concepts get indexed
                        boolfilter_folded.should(Match(field='child_entities.value', query=term['value'], type='phrase_prefix', fuzziness='AUTO', operator='and'))
                        boolfilter_folded.should(Match(field='child_entities.value.folded', query=term['value'], type='phrase_prefix', fuzziness='AUTO', operator='and'))
                        boolfilter_folded.should(Match(field='child_entities.value.folded', query=term['value'], fuzziness='AUTO', operator='and'))
                        nested = Nested(path='child_entities', query=boolfilter_folded)
                        boolfilter_folded2.should(Match(field='domains.label', query=term['value'], type='phrase_prefix', fuzziness='AUTO', operator='and'))
                        boolfilter_folded2.should(Match(field='domains.label.folded', query=term['value'], type='phrase_prefix', fuzziness='AUTO', operator='and'))
                        boolfilter_folded2.should(Match(field='domains.label.folded', query=term['value'], fuzziness='AUTO', operator='and'))
                        nested2 = Nested(path='domains', query=boolfilter_folded2)
                        boolquery2.should(nested)
                        boolquery2.should(nested2)
                        if filter_and_or[index] == 'or':
                            if not term['inverted']:
                                # use boolfilter here instead of boolquery because boolquery
                                # can't be combined with other boolfilters using boolean OR
                                selectbox_boolfilter.should(boolquery2)
                        else:
                            if term['inverted']:
                                selectbox_boolfilter.must_not(boolquery2)
                            else:    
                                selectbox_boolfilter.must(boolquery2)
                            
                if 'year_min_max' in temporal_filter[index] and len(temporal_filter[index]['year_min_max']) == 2:
                    start_date = date(temporal_filter[index]['year_min_max'][0], 1, 1)
                    end_date = date(temporal_filter[index]['year_min_max'][1], 12, 31)
                    if start_date:
                        start_date = start_date.isoformat()
                    if end_date:
                        end_date = end_date.isoformat()
                    range = Range(field='dates.value', gte=start_date, lte=end_date)
                    nested = Nested(path='dates', query=range)
            
                    if 'inverted' not in temporal_filter[index]:
                        temporal_filter[index]['inverted'] = False

                    if temporal_filter[index]['inverted']:
                        selectbox_boolfilter.must_not(nested)
                    else:
                        selectbox_boolfilter.must(nested)
                        
                if 'filters' in temporal_filter[index]:
                    for temporal_filter_item in temporal_filter[index]['filters']:
                        date_type = ''
                        searchdate = ''
                        date_operator = ''
                        for node in temporal_filter_item['nodes']:
                            if node['entitytypeid'] == 'DATE_COMPARISON_OPERATOR.E55':
                                date_operator = node['value']
                            elif node['entitytypeid'] == 'date':
                                searchdate = node['value']
                            else:
                                date_type = node['value']


                        date_value = datetime.strptime(searchdate, '%Y-%m-%d').isoformat()

                        if date_operator == '1': # equals query
                            range = Range(field='dates.value', gte=date_value, lte=date_value)
                        elif date_operator == '0': # greater than query 
                            range = Range(field='dates.value', lt=date_value)
                        elif date_operator == '2': # less than query
                            range = Range(field='dates.value', gt=date_value)
                        
                        nested = Nested(path='dates', query=range)
                        if 'inverted' not in temporal_filter[index]:
                            temporal_filter[index]['inverted'] = False

                        if temporal_filter[index]['inverted']:
                            selectbox_boolfilter.must_not(nested)
                        else:
                            selectbox_boolfilter.must(nested)


            terms_queries.append(selectbox_boolfilter)
            # if not selectbox_boolfilter.empty:
            #     if boolean_search == 'or':
            #         boolfilter.should(selectbox_boolfilter)
            #     else:
            #         boolfilter.must(selectbox_boolfilter)
        
        # We now have individual query terms for each of the search components. Combine into one group now
        # Start by building a an array of groups which will be combined according to the global And/Or
        # Queries within one of these groups will be combined by the complement of the global And/Or
        # We may end up with [ [A,B], [C], [D,E] ], which would translate to either:
        #    (A || B) && C && (D || E)
        #       or
        #    (A && B) || C || (D && E)
        # for global AND or OR respectively
        
        # logging.warning("TERMS QUERIES %s", terms_queries)
        
        bool_components = [];
        
        for i, term_query in enumerate(terms_queries):
            if i is 0:
                bool_components.append([term_query])
            else:
                should_group_with_previous = filter_combine_flags[i-1]
                if should_group_with_previous:
                    bool_components[-1].append(term_query)
                else:
                    bool_components.append([term_query])
            
        # logging.warning("BOOL COMPONENTS %s", bool_components)
        # Now build the ES queries
        for bool_component in bool_components:
            if len(bool_component) is 1:
                # just combine this on its own
                q = bool_component[0]
            else:
                q = Bool()
                for sub_component in bool_component:
                    if boolean_search == 'or':
                        #apply the OPPOSITE of the global boolean operator
                        q.must(sub_component)
                    else:
                        q.should(sub_component)
                        
            # combine to the overall query according to the global boolean operator
            if boolean_search == 'or':
                boolfilter.should(q)
            else:
                boolfilter.must(q)

    if 'geometry' in spatial_filter and 'type' in spatial_filter['geometry'] and spatial_filter['geometry']['type'] != '':
        geojson = spatial_filter['geometry']
        if geojson['type'] == 'bbox':
            coordinates = [[geojson['coordinates'][0],geojson['coordinates'][3]], [geojson['coordinates'][2],geojson['coordinates'][1]]]
            geoshape = GeoShape(field='geometries.value', type='envelope', coordinates=coordinates )
            nested = Nested(path='geometries', query=geoshape)
        else:
            buffer = spatial_filter['buffer']
            geojson = JSONDeserializer().deserialize(_buffer(geojson,buffer['width'],buffer['unit']).json)
            geoshape = GeoShape(field='geometries.value', type=geojson['type'], coordinates=geojson['coordinates'] )
            nested = Nested(path='geometries', query=geoshape)

        if 'inverted' not in spatial_filter:
            spatial_filter['inverted'] = False

        if spatial_filter['inverted']:
            boolfilter.must_not(nested)
        else:
            boolfilter.must(nested)

    if not boolquery.empty:
        query.add_query(boolquery)

    if not boolfilter.empty:
        query.add_filter(boolfilter)
    
#  Sorting criterion added to query (AZ 10/08/16)
    query.dsl.update({'sort': sorting})
    # logging.warning("-=-==-=-===-=--=-==-=-===-=- query: -=-==-=-===-=--=-==-=-===-=-> %s", query)

    return query
Beispiel #15
0
def build_search_results_dsl(request):
    term_filter = request.GET.get('termFilter', '')
    spatial_filter = JSONDeserializer().deserialize(
        request.GET.get('mapFilter', '{}'))
    export = request.GET.get('export', None)
    page = 1 if request.GET.get('page') == '' else int(
        request.GET.get('page', 1))
    temporal_filter = JSONDeserializer().deserialize(
        request.GET.get('temporalFilter', '{}'))

    se = SearchEngineFactory().create()

    if export != None:
        limit = settings.SEARCH_EXPORT_ITEMS_PER_PAGE
    else:
        limit = settings.SEARCH_ITEMS_PER_PAGE

    query = Query(se, start=limit * int(page - 1), limit=limit)
    boolquery = Bool()
    boolfilter = Bool()

    if term_filter != '':
        for term in JSONDeserializer().deserialize(term_filter):
            if term['type'] == 'term':
                term_filter = Bool()
                term_filter.must(
                    Match(field='strings', query=term['value'], type='phrase'))
                if term['inverted']:
                    boolfilter.must_not(term_filter)
                else:
                    boolfilter.must(term_filter)
            elif term['type'] == 'concept':
                concept_ids = _get_child_concepts(term['value'])
                conceptid_filter = Terms(field='domains.conceptid',
                                         terms=concept_ids)
                if term['inverted']:
                    boolfilter.must_not(conceptid_filter)
                else:
                    boolfilter.must(conceptid_filter)
            elif term['type'] == 'string':
                string_filter = Bool()
                string_filter.should(
                    Match(field='strings',
                          query=term['value'],
                          type='phrase_prefix'))
                string_filter.should(
                    Match(field='strings.folded',
                          query=term['value'],
                          type='phrase_prefix'))
                if term['inverted']:
                    boolfilter.must_not(string_filter)
                else:
                    boolfilter.must(string_filter)

    if 'features' in spatial_filter:
        if len(spatial_filter['features']) > 0:
            feature_geom = spatial_filter['features'][0]['geometry']
            feature_properties = spatial_filter['features'][0]['properties']
            buffer = {'width': 0, 'unit': 'ft'}
            if 'buffer' in feature_properties:
                buffer = feature_properties['buffer']
            feature_geom = JSONDeserializer().deserialize(
                _buffer(feature_geom, buffer['width'], buffer['unit']).json)
            geoshape = GeoShape(field='geometries.features.geometry',
                                type=feature_geom['type'],
                                coordinates=feature_geom['coordinates'])

            invert_spatial_search = False
            if 'inverted' in feature_properties:
                invert_spatial_search = feature_properties['inverted']

            if invert_spatial_search == True:
                boolfilter.must_not(geoshape)
            else:
                boolfilter.must(geoshape)

    if 'fromDate' in temporal_filter and 'toDate' in temporal_filter:
        start_date = None
        end_date = None
        try:
            start_date = parser.parse(temporal_filter['fromDate'])
            start_date = start_date.isoformat()
        except:
            pass
        try:
            end_date = parser.parse(temporal_filter['toDate'])
            end_date = end_date.isoformat()
        except:
            pass

        if 'dateNodeId' in temporal_filter and temporal_filter[
                'dateNodeId'] != '':
            range = Range(field='tiles.data.%s' %
                          (temporal_filter['dateNodeId']),
                          gte=start_date,
                          lte=end_date)
            time_query_dsl = Nested(path='tiles', query=range)
        else:
            time_query_dsl = Range(field='dates', gte=start_date, lte=end_date)

        if 'inverted' not in temporal_filter:
            temporal_filter['inverted'] = False

        if temporal_filter['inverted']:
            boolfilter.must_not(time_query_dsl)
        else:
            boolfilter.must(time_query_dsl)

    if not boolquery.empty:
        query.add_query(boolquery)

    if not boolfilter.empty:
        query.add_filter(boolfilter)

    return query
Beispiel #16
0
def build_search_results_dsl(request):
    term_filter = request.GET.get('termFilter', '')
    spatial_filter = JSONDeserializer().deserialize(request.GET.get('spatialFilter', None)) 
    export = request.GET.get('export', None)
    page = 1 if request.GET.get('page') == '' else int(request.GET.get('page', 1))
    temporal_filter = JSONDeserializer().deserialize(request.GET.get('temporalFilter', None))

    se = SearchEngineFactory().create()

    if export != None:
        limit = settings.SEARCH_EXPORT_ITEMS_PER_PAGE  
    else:
        limit = settings.SEARCH_ITEMS_PER_PAGE
    
    query = Query(se, start=limit*int(page-1), limit=limit)
    boolquery = Bool()
    boolfilter = Bool()
    
    if term_filter != '':
        for term in JSONDeserializer().deserialize(term_filter):
            if term['type'] == 'term':
                entitytype = models.EntityTypes.objects.get(conceptid_id=term['context'])
                boolfilter_nested = Bool()
                boolfilter_nested.must(Terms(field='child_entities.entitytypeid', terms=[entitytype.pk]))
                boolfilter_nested.must(Match(field='child_entities.value', query=term['value'], type='phrase'))
                nested = Nested(path='child_entities', query=boolfilter_nested)
                if term['inverted']:
                    boolfilter.must_not(nested)
                else:    
                    boolfilter.must(nested)
            elif term['type'] == 'concept':
                concept_ids = _get_child_concepts(term['value'])
                terms = Terms(field='domains.conceptid', terms=concept_ids)
                nested = Nested(path='domains', query=terms)
                if term['inverted']:
                    boolfilter.must_not(nested)
                else:
                    boolfilter.must(nested)
            elif term['type'] == 'string':
                boolfilter_folded = Bool()
                boolfilter_folded.should(Match(field='child_entities.value', query=term['value'], type='phrase_prefix'))
                boolfilter_folded.should(Match(field='child_entities.value.folded', query=term['value'], type='phrase_prefix'))
                nested = Nested(path='child_entities', query=boolfilter_folded)
                if term['inverted']:
                    boolquery.must_not(nested)
                else:    
                    boolquery.must(nested)

    if 'geometry' in spatial_filter and 'type' in spatial_filter['geometry'] and spatial_filter['geometry']['type'] != '':
        geojson = spatial_filter['geometry']
        if geojson['type'] == 'bbox':
            coordinates = [[geojson['coordinates'][0],geojson['coordinates'][3]], [geojson['coordinates'][2],geojson['coordinates'][1]]]
            geoshape = GeoShape(field='geometries.value', type='envelope', coordinates=coordinates )
            nested = Nested(path='geometries', query=geoshape)
        else:
            buffer = spatial_filter['buffer']
            geojson = JSONDeserializer().deserialize(_buffer(geojson,buffer['width'],buffer['unit']).json)
            geoshape = GeoShape(field='geometries.value', type=geojson['type'], coordinates=geojson['coordinates'] )
            nested = Nested(path='geometries', query=geoshape)

        if 'inverted' not in spatial_filter:
            spatial_filter['inverted'] = False

        if spatial_filter['inverted']:
            boolfilter.must_not(nested)
        else:
            boolfilter.must(nested)

    if 'year_min_max' in temporal_filter and len(temporal_filter['year_min_max']) == 2:
        start_date = date(temporal_filter['year_min_max'][0], 1, 1)
        end_date = date(temporal_filter['year_min_max'][1], 12, 31)
        if start_date:
            start_date = start_date.isoformat()
        if end_date:
            end_date = end_date.isoformat()
        range = Range(field='dates.value', gte=start_date, lte=end_date)
        nested = Nested(path='dates', query=range)
        
        if 'inverted' not in temporal_filter:
            temporal_filter['inverted'] = False

        if temporal_filter['inverted']:
            boolfilter.must_not(nested)
        else:
            boolfilter.must(nested)
        
    if not boolquery.empty:
        query.add_query(boolquery)

    if not boolfilter.empty:
        query.add_filter(boolfilter)

    return query
Beispiel #17
0
def build_search_results_dsl(request):
#    Results are sorted ascendingly by the value of SITE_ID.E42, which is displayed as primary name of Heritage Resources. 
#    Must go back to this method once new Automatic Resource ID has been fully developed (AZ 10/08/16) Update 06/09/16: EAMENA_ID.E42 now used as sorting criterion.

    sorting = {
		"child_entities.label":  {
			"order" : "asc",
			"nested_path": "child_entities",
			"nested_filter": {
				"term": {"child_entities.entitytypeid" : "EAMENA_ID.E42"}
			}
		}
	}
    
    term_filter = request.GET.get('termFilter', '')
    
    
    spatial_filter = JSONDeserializer().deserialize(request.GET.get('spatialFilter', None)) 
    export = request.GET.get('export', None)
    page = 1 if request.GET.get('page') == '' else int(request.GET.get('page', 1))
    temporal_filter = JSONDeserializer().deserialize(request.GET.get('temporalFilter', None))
    boolean_search = request.GET.get('booleanSearch', '')
    filter_and_or = JSONDeserializer().deserialize(request.GET.get('termFilterAndOr', ''))
    filter_grouping = JSONDeserializer().deserialize(request.GET.get('termFilterGroup', ''))
    
    filter_combine_flags = JSONDeserializer().deserialize(request.GET.get('termFilterCombineWithPrev', ''))
    #Ignore first entry as it is a dummy
    filter_combine_flags = filter_combine_flags[1:]
    # filter_combine_flags = [False, True, False, False, False]
    
    # filter_groups = JSONDeserializer().deserialize(request.GET.get('termFilterGroups', ''))
    # Not here yet, so put in some bogus data
    # filter_groups = [
    #     'NAME.E41',
    #     'NAME.E41',
    #     'DISTURBANCE_STATE.E3',
    #     'THREAT_STATE.E3'
    # ]
    
    se = SearchEngineFactory().create()

    if export != None:
        limit = settings.SEARCH_EXPORT_ITEMS_PER_PAGE  
    else:
        limit = settings.SEARCH_ITEMS_PER_PAGE
    
    query = Query(se, start=limit*int(page-1), limit=limit)
    boolquery = Bool()
    boolfilter = Bool()
    is_empty_temporal_filter = True

    # store each search term in an initially. These will be combined based on the global and/or and the optional groupings
    terms_queries = [];

    # logging.warning("-------QUERY-------")

    if term_filter != '' or not is_empty_temporal_filter:
        for index, select_box in enumerate(JSONDeserializer().deserialize(term_filter)):
            selectbox_boolfilter = Bool()
            
            groupid = filter_grouping[index]
            if not groupid == 'No group':
                # build a nested query against the nested_entities
                
                # trace the path from each term to the group root
                term_paths = []
                for term in select_box:

                    # trace path from group root to this term
                    if term['type'] == 'concept':
                        
                        # get the parent concept for this value i.e. the field
                        term_parent_concept = Concept.get_parent_concept(term['value'])
                        
                        # get the steps from the root to that concept
                        if term_parent_concept.nodetype.nodetype == "Collection":
                            term_schema = Entity.get_mapping_schema_to(term_parent_concept.legacyoid)
                        elif term_parent_concept.nodetype.nodetype == 'Concept':
                            # need to get at the parent until we reach the root collection. concepts are arranged hierarchically
                            parent_relations_to = models.ConceptRelations.objects.filter(conceptidto=term_parent_concept.conceptid, relationtype='member')
                            grandparent = models.Concepts.objects.filter(conceptid=parent_relations_to[0].conceptidfrom)
                            term_schema = Entity.get_mapping_schema_to(grandparent[0].legacyoid)
                        
                        #this path begins at the root, and ends up at the node in question
                        term_path = term_schema['HERITAGE_RESOURCE_GROUP.E27']['steps']
                        
                        term_paths.append({
                            'term': term,
                            'path': term_path
                        })
                        
                    elif term['type'] == 'term':

                        concept = models.Concepts.objects.get(conceptid=term['context'])
                        term_schema = Entity.get_mapping_schema_to(concept.legacyoid)
                        term_path = term_schema['HERITAGE_RESOURCE_GROUP.E27']['steps']
                        
                        term_paths.append({
                            'term': term,
                            'path': term_path
                        })

                    elif term['type'] == 'string':
                        term_schema = Entity.get_mapping_schema_to(groupid)
                        term_path = term_schema['HERITAGE_RESOURCE_GROUP.E27']['steps']
                        
                        term_paths.append({
                            'term': term,
                            'path': term_path
                        })
                        
                if 'year_min_max' in temporal_filter[index] and len(temporal_filter[index]['year_min_max']) == 2:
                    start_date = date(temporal_filter[index]['year_min_max'][0], 1, 1)
                    end_date = date(temporal_filter[index]['year_min_max'][1], 12, 31)
                    if start_date:
                        start_date = start_date.isoformat()
                    if end_date:
                        end_date = end_date.isoformat()

                    if 'inverted' not in temporal_filter[index]:
                        inverted_temporal_filter = False
                    else:
                        if temporal_filter[index]['inverted']:
                            inverted_temporal_filter = True
                        else:
                            inverted_temporal_filter = False
                    
                    term_paths.append({
                        'term': {
                            'date_operator': '3',
                            'start_date': start_date,
                            'end_date': end_date,
                            'type': 'date',
                            'inverted': inverted_temporal_filter
                        },
                        'path': term_path
                    })
                    
                    
                if 'filters' in temporal_filter[index]:
                    term_schema = Entity.get_mapping_schema_to(groupid)
                    term_path = term_schema['HERITAGE_RESOURCE_GROUP.E27']['steps']

                    for temporal_filter_item in temporal_filter[index]['filters']:
                        date_type = ''
                        searchdate = ''
                        date_operator = ''
                        for node in temporal_filter_item['nodes']:
                            if node['entitytypeid'] == 'DATE_COMPARISON_OPERATOR.E55':
                                date_operator = node['value']
                            elif node['entitytypeid'] == 'date':
                                searchdate = node['value']
                            else:
                                date_type = node['value']
                
                        date_value = datetime.strptime(searchdate, '%Y-%m-%d').isoformat()
                        if 'inverted' not in temporal_filter[index]:
                            inverted_temporal_filter = False
                        else:
                            if temporal_filter[index]['inverted']:
                                inverted_temporal_filter = True
                            else:
                                inverted_temporal_filter = False
                                
                        term_paths.append({
                            'term': {
                                'date_operator': date_operator,
                                'date_value': date_value,
                                'type': 'date',
                                'inverted': inverted_temporal_filter
                            },
                            'path': term_path
                        })

                # combine the traced path to build a nested query                
                group_query = nested_query_from_pathed_values(term_paths, 'nested_entity.child_entities')

                
                # add nested query to overall query
                selectbox_boolfilter.must(group_query)
                
                # logging.warning("BOX QUERY - %s", JSONSerializer().serialize(selectbox_boolfilter, indent=2))

            else:    
                for term in select_box:
                    
                    if term['type'] == 'term':
                        entitytype = models.EntityTypes.objects.get(conceptid_id=term['context'])
                        boolfilter_nested = Bool()
                        boolfilter_nested.must(Terms(field='child_entities.entitytypeid', terms=[entitytype.pk]))
                        boolfilter_nested.must(Match(field='child_entities.value', query=term['value'], type='phrase'))
                        nested = Nested(path='child_entities', query=boolfilter_nested)
                        if filter_and_or[index] == 'or':
                            if not term['inverted']:
                                selectbox_boolfilter.should(nested)
                        else:
                            if term['inverted']:
                                selectbox_boolfilter.must_not(nested)
                            else:    
                                selectbox_boolfilter.must(nested)
                                
                    elif term['type'] == 'concept':
                        concept_ids = _get_child_concepts(term['value'])
                        terms = Terms(field='domains.conceptid', terms=concept_ids)
                        nested = Nested(path='domains', query=terms)
                        if filter_and_or[index] == 'or':
                            if not term['inverted']:
                                    selectbox_boolfilter.should(nested)
                        else:
                            if term['inverted']:
                                selectbox_boolfilter.must_not(nested)
                            else:
                                selectbox_boolfilter.must(nested)
                                
                    elif term['type'] == 'string':
                        boolquery2 = Bool() #This bool contains the subset of nested string queries on both domains and child_entities paths
                        boolfilter_folded = Bool() #This bool searches by string in child_entities, where free text strings get indexed
                        boolfilter_folded2 = Bool() #This bool searches by string in the domains path,where controlled vocabulary concepts get indexed
                        boolfilter_folded.should(Match(field='child_entities.value', query=term['value'], type='phrase_prefix', fuzziness='AUTO', operator='and'))
                        boolfilter_folded.should(Match(field='child_entities.value.folded', query=term['value'], type='phrase_prefix', fuzziness='AUTO', operator='and'))
                        boolfilter_folded.should(Match(field='child_entities.value.folded', query=term['value'], fuzziness='AUTO', operator='and'))
                        nested = Nested(path='child_entities', query=boolfilter_folded)
                        boolfilter_folded2.should(Match(field='domains.label', query=term['value'], type='phrase_prefix', fuzziness='AUTO', operator='and'))
                        boolfilter_folded2.should(Match(field='domains.label.folded', query=term['value'], type='phrase_prefix', fuzziness='AUTO', operator='and'))
                        boolfilter_folded2.should(Match(field='domains.label.folded', query=term['value'], fuzziness='AUTO', operator='and'))
                        nested2 = Nested(path='domains', query=boolfilter_folded2)
                        boolquery2.should(nested)
                        boolquery2.should(nested2)
                        if filter_and_or[index] == 'or':
                            if not term['inverted']:
                                # use boolfilter here instead of boolquery because boolquery
                                # can't be combined with other boolfilters using boolean OR
                                selectbox_boolfilter.should(boolquery2)
                        else:
                            if term['inverted']:
                                selectbox_boolfilter.must_not(boolquery2)
                            else:    
                                selectbox_boolfilter.must(boolquery2)
                            
                if 'year_min_max' in temporal_filter[index] and len(temporal_filter[index]['year_min_max']) == 2:
                    start_date = date(temporal_filter[index]['year_min_max'][0], 1, 1)
                    end_date = date(temporal_filter[index]['year_min_max'][1], 12, 31)
                    if start_date:
                        start_date = start_date.isoformat()
                    if end_date:
                        end_date = end_date.isoformat()
                    range = Range(field='dates.value', gte=start_date, lte=end_date)
                    nested = Nested(path='dates', query=range)
            
                    if 'inverted' not in temporal_filter[index]:
                        temporal_filter[index]['inverted'] = False

                    if temporal_filter[index]['inverted']:
                        selectbox_boolfilter.must_not(nested)
                    else:
                        selectbox_boolfilter.must(nested)
                        
                if 'filters' in temporal_filter[index]:
                    for temporal_filter_item in temporal_filter[index]['filters']:
                        date_type = ''
                        searchdate = ''
                        date_operator = ''
                        for node in temporal_filter_item['nodes']:
                            if node['entitytypeid'] == 'DATE_COMPARISON_OPERATOR.E55':
                                date_operator = node['value']
                            elif node['entitytypeid'] == 'date':
                                searchdate = node['value']
                            else:
                                date_type = node['value']


                        date_value = datetime.strptime(searchdate, '%Y-%m-%d').isoformat()

                        if date_operator == '1': # equals query
                            range = Range(field='dates.value', gte=date_value, lte=date_value)
                        elif date_operator == '0': # greater than query 
                            range = Range(field='dates.value', lt=date_value)
                        elif date_operator == '2': # less than query
                            range = Range(field='dates.value', gt=date_value)
                        
                        nested = Nested(path='dates', query=range)
                        if 'inverted' not in temporal_filter[index]:
                            temporal_filter[index]['inverted'] = False

                        if temporal_filter[index]['inverted']:
                            selectbox_boolfilter.must_not(nested)
                        else:
                            selectbox_boolfilter.must(nested)


            terms_queries.append(selectbox_boolfilter)
            # if not selectbox_boolfilter.empty:
            #     if boolean_search == 'or':
            #         boolfilter.should(selectbox_boolfilter)
            #     else:
            #         boolfilter.must(selectbox_boolfilter)
        
        # We now have individual query terms for each of the search components. Combine into one group now
        # Start by building a an array of groups which will be combined according to the global And/Or
        # Queries within one of these groups will be combined by the complement of the global And/Or
        # We may end up with [ [A,B], [C], [D,E] ], which would translate to either:
        #    (A || B) && C && (D || E)
        #       or
        #    (A && B) || C || (D && E)
        # for global AND or OR respectively
        
        # logging.warning("TERMS QUERIES %s", terms_queries)
        
        bool_components = [];
        
        for i, term_query in enumerate(terms_queries):
            if i is 0:
                bool_components.append([term_query])
            else:
                should_group_with_previous = filter_combine_flags[i-1]
                if should_group_with_previous:
                    bool_components[-1].append(term_query)
                else:
                    bool_components.append([term_query])
            
        # logging.warning("BOOL COMPONENTS %s", bool_components)
        # Now build the ES queries
        for bool_component in bool_components:
            if len(bool_component) is 1:
                # just combine this on its own
                q = bool_component[0]
            else:
                q = Bool()
                for sub_component in bool_component:
                    if boolean_search == 'or':
                        #apply the OPPOSITE of the global boolean operator
                        q.must(sub_component)
                    else:
                        q.should(sub_component)
                        
            # combine to the overall query according to the global boolean operator
            if boolean_search == 'or':
                boolfilter.should(q)
            else:
                boolfilter.must(q)

    if 'geometry' in spatial_filter and 'type' in spatial_filter['geometry'] and spatial_filter['geometry']['type'] != '':
        geojson = spatial_filter['geometry']
        if geojson['type'] == 'bbox':
            coordinates = [[geojson['coordinates'][0],geojson['coordinates'][3]], [geojson['coordinates'][2],geojson['coordinates'][1]]]
            geoshape = GeoShape(field='geometries.value', type='envelope', coordinates=coordinates )
            nested = Nested(path='geometries', query=geoshape)
        else:
            buffer = spatial_filter['buffer']
            geojson = JSONDeserializer().deserialize(_buffer(geojson,buffer['width'],buffer['unit']).json)
            geoshape = GeoShape(field='geometries.value', type=geojson['type'], coordinates=geojson['coordinates'] )
            nested = Nested(path='geometries', query=geoshape)

        if 'inverted' not in spatial_filter:
            spatial_filter['inverted'] = False

        if spatial_filter['inverted']:
            boolfilter.must_not(nested)
        else:
            boolfilter.must(nested)

    if not boolquery.empty:
        query.add_query(boolquery)

    if not boolfilter.empty:
        query.add_filter(boolfilter)
    
#  Sorting criterion added to query (AZ 10/08/16)
    query.dsl.update({'sort': sorting})
    # logging.warning("-=-==-=-===-=--=-==-=-===-=- query: -=-==-=-===-=--=-==-=-===-=-> %s", query)

    return query
Beispiel #18
0
def build_search_results_dsl(request):
    term_filter = request.GET.get('termFilter', '')
    spatial_filter = JSONDeserializer().deserialize(request.GET.get('spatialFilter', None)) 
    export = request.GET.get('export', None)
    page = 1 if request.GET.get('page') == '' else int(request.GET.get('page', 1))
    temporal_filter = JSONDeserializer().deserialize(request.GET.get('temporalFilter', None))

    se = SearchEngineFactory().create()

    if export != None:
        limit = settings.SEARCH_EXPORT_ITEMS_PER_PAGE  
    else:
        limit = settings.SEARCH_ITEMS_PER_PAGE
    
    query = Query(se, start=limit*int(page-1), limit=limit)
    boolquery = Bool()
    boolfilter = Bool()
    
    if term_filter != '':
        for term in JSONDeserializer().deserialize(term_filter):
            if term['type'] == 'term':
                entitytype = models.EntityTypes.objects.get(conceptid_id=term['context'])
                boolfilter_nested = Bool()
                boolfilter_nested.must(Terms(field='child_entities.entitytypeid', terms=[entitytype.pk]))
                boolfilter_nested.must(Match(field='child_entities.value', query=term['value'], type='phrase'))
                nested = Nested(path='child_entities', query=boolfilter_nested)
                if term['inverted']:
                    boolfilter.must_not(nested)
                else:    
                    boolfilter.must(nested)
            elif term['type'] == 'concept':
                concept_ids = _get_child_concepts(term['value'])
                terms = Terms(field='domains.conceptid', terms=concept_ids)
                nested = Nested(path='domains', query=terms)
                if term['inverted']:
                    boolfilter.must_not(nested)
                else:
                    boolfilter.must(nested)
            elif term['type'] == 'string':
                boolfilter_folded = Bool()
                boolfilter_folded.should(Match(field='child_entities.value', query=term['value'], type='phrase_prefix'))
                boolfilter_folded.should(Match(field='child_entities.value.folded', query=term['value'], type='phrase_prefix'))
                nested = Nested(path='child_entities', query=boolfilter_folded)
                if term['inverted']:
                    boolquery.must_not(nested)
                else:    
                    boolquery.must(nested)

    if 'geometry' in spatial_filter and 'type' in spatial_filter['geometry'] and spatial_filter['geometry']['type'] != '':
        geojson = spatial_filter['geometry']
        if geojson['type'] == 'bbox':
            coordinates = [[geojson['coordinates'][0],geojson['coordinates'][3]], [geojson['coordinates'][2],geojson['coordinates'][1]]]
            geoshape = GeoShape(field='geometries.value', type='envelope', coordinates=coordinates )
            nested = Nested(path='geometries', query=geoshape)
        else:
            buffer = spatial_filter['buffer']
            geojson = JSONDeserializer().deserialize(_buffer(geojson,buffer['width'],buffer['unit']).json)
            geoshape = GeoShape(field='geometries.value', type=geojson['type'], coordinates=geojson['coordinates'] )
            nested = Nested(path='geometries', query=geoshape)

        if 'inverted' not in spatial_filter:
            spatial_filter['inverted'] = False

        if spatial_filter['inverted']:
            boolfilter.must_not(nested)
        else:
            boolfilter.must(nested)

    if 'year_min_max' in temporal_filter and len(temporal_filter['year_min_max']) == 2:
        start = temporal_filter['year_min_max'][0]*10000
        end = temporal_filter['year_min_max'][1]*10000
        range = Range(field='extendeddates.value', gte=start, lte=end)
        nested = Nested(path='extendeddates', query=range)
        
        if 'inverted' not in temporal_filter:
            temporal_filter['inverted'] = False

        if temporal_filter['inverted']:
            boolfilter.must_not(nested)
        else:
            boolfilter.must(nested)

    if 'filters' in temporal_filter:
        time_boolfilter = Bool()
        for temporal_filter in temporal_filter['filters']:
            date_type = ''
            date = ''
            date_operator = ''
            for node in temporal_filter['nodes']:
                if node['entitytypeid'] == 'DATE_COMPARISON_OPERATOR.E55':
                    date_operator = node['value']
                elif node['entitytypeid'] == 'date':
                    date = node['value']
                else:
                    date_type = node['value']

            terms = Terms(field='extendeddategroups.conceptid', terms=date_type)
            boolfilter.must(terms)

            date_value = date_to_int(date)

            if date_operator == '1': # equals query
                range = Range(field='extendeddategroups.value', gte=date_value, lte=date_value)
            elif date_operator == '0': # greater than query 
                range = Range(field='extendeddategroups.value', lt=date_value)
            elif date_operator == '2': # less than query
                range = Range(field='extendeddategroups.value', gt=date_value)

            if 'inverted' not in temporal_filter:
                temporal_filter['inverted'] = False

            if temporal_filter['inverted']:
                boolfilter.must_not(range)
            else:
                boolfilter.must(range)

    if not boolquery.empty:
        query.add_query(boolquery)

    if not boolfilter.empty:
        query.add_filter(boolfilter)
        
    return query
Beispiel #19
0
def build_base_search_results_dsl(request):
    term_filter = request.GET.get('termFilter', '')
    spatial_filter = JSONDeserializer().deserialize(request.GET.get('spatialFilter', None)) 
    export = request.GET.get('export', None)
    page = 1 if request.GET.get('page') == '' else int(request.GET.get('page', 1))
    temporal_filter = JSONDeserializer().deserialize(request.GET.get('temporalFilter', None))

    se = SearchEngineFactory().create()

    if export != None:
        limit = settings.SEARCH_EXPORT_ITEMS_PER_PAGE  
    else:
        limit = settings.SEARCH_ITEMS_PER_PAGE
    
    query = Query(se, start=limit*int(page-1), limit=limit)
    boolquery = Bool()
    boolfilter = Bool()
    
    if term_filter != '':
        # Ce uporabnik ni avtenticiran, prikazemo le veljavne (to je verjetno potrebno se dodelati (mogoce da vidijo le svoje???)!!!)
        if (request.user.username == 'anonymous'):
            auto_filter = []
            for item in JSONDeserializer().deserialize(term_filter):
               auto_filter.append(item) 
            
            # Poiscimo concept id in context za Published status
            AUTO_TERM_FILTER = get_auto_filter(request)
            
            auto_filter.append(AUTO_TERM_FILTER)
            term_filter = JSONSerializer().serialize(auto_filter)
            
    print 'term_filter'
    if term_filter != '':
        for term in JSONDeserializer().deserialize(term_filter):
            print term
            if term['type'] == 'term':
                entitytype = models.EntityTypes.objects.get(conceptid_id=term['context'])
                boolfilter_nested = Bool()
                boolfilter_nested.must(Terms(field='child_entities.entitytypeid', terms=[entitytype.pk]))
                boolfilter_nested.must(Match(field='child_entities.value', query=term['value'], type='phrase'))
                nested = Nested(path='child_entities', query=boolfilter_nested)
                if term['inverted']:
                    boolfilter.must_not(nested)
                else:    
                    boolfilter.must(nested)
            elif term['type'] == 'concept':
                concept_ids = _get_child_concepts(term['value'])
                terms = Terms(field='domains.conceptid', terms=concept_ids)
                nested = Nested(path='domains', query=terms)
                if term['inverted']:
                    boolfilter.must_not(nested)
                else:
                    boolfilter.must(nested)
            elif term['type'] == 'string':
                boolfilter_folded = Bool()
                boolfilter_folded.should(Match(field='child_entities.value', query=term['value'], type='phrase_prefix'))
                boolfilter_folded.should(Match(field='child_entities.value.folded', query=term['value'], type='phrase_prefix'))
                nested = Nested(path='child_entities', query=boolfilter_folded)
                if term['inverted']:
                    boolquery.must_not(nested)
                else:    
                    boolquery.must(nested)
    if 'geometry' in spatial_filter and 'type' in spatial_filter['geometry'] and spatial_filter['geometry']['type'] != '':
        geojson = spatial_filter['geometry']
        if geojson['type'] == 'bbox':
            coordinates = [[geojson['coordinates'][0],geojson['coordinates'][3]], [geojson['coordinates'][2],geojson['coordinates'][1]]]
            geoshape = GeoShape(field='geometries.value', type='envelope', coordinates=coordinates )
            nested = Nested(path='geometries', query=geoshape)
        else:
            buffer = spatial_filter['buffer']
            geojson = JSONDeserializer().deserialize(_buffer(geojson,buffer['width'],buffer['unit']).json)
            geoshape = GeoShape(field='geometries.value', type=geojson['type'], coordinates=geojson['coordinates'] )
            nested = Nested(path='geometries', query=geoshape)

        if 'inverted' not in spatial_filter:
            spatial_filter['inverted'] = False

        if spatial_filter['inverted']:
            boolfilter.must_not(nested)
        else:
            boolfilter.must(nested)

    if 'year_min_max' in temporal_filter and len(temporal_filter['year_min_max']) == 2:
        start_date = date(temporal_filter['year_min_max'][0], 1, 1)
        end_date = date(temporal_filter['year_min_max'][1], 12, 31)
        if start_date:
            start_date = start_date.isoformat()
        if end_date:
            end_date = end_date.isoformat()
        range = Range(field='dates.value', gte=start_date, lte=end_date)
        nested = Nested(path='dates', query=range)
        
        if 'inverted' not in temporal_filter:
            temporal_filter['inverted'] = False

        if temporal_filter['inverted']:
            boolfilter.must_not(nested)
        else:
            boolfilter.must(nested)
        
    if not boolquery.empty:
        query.add_query(boolquery)

    if not boolfilter.empty:
        query.add_filter(boolfilter)

    return query