def get_related_resources(self, lang='en-US', limit=1000, start=0): """ Returns an object that lists the related resources, the relationship types, and a reference to the current resource """ ret = { 'resource_instance': self, 'resource_relationships': [], 'related_resources': [] } se = SearchEngineFactory().create() query = Query(se, limit=limit, start=start) bool_filter = Bool() bool_filter.should(Terms(field='resourceinstanceidfrom', terms=self.resourceinstanceid)) bool_filter.should(Terms(field='resourceinstanceidto', terms=self.resourceinstanceid)) query.add_query(bool_filter) resource_relations = query.search(index='resource_relations', doc_type='all') ret['total'] = resource_relations['hits']['total'] instanceids = set() for relation in resource_relations['hits']['hits']: relation['_source']['preflabel'] = get_preflabel_from_valueid(relation['_source']['relationshiptype'], lang) ret['resource_relationships'].append(relation['_source']) instanceids.add(relation['_source']['resourceinstanceidto']) instanceids.add(relation['_source']['resourceinstanceidfrom']) if len(instanceids) > 0: instanceids.remove(str(self.resourceinstanceid)) related_resources = se.search(index='resource', doc_type='_all', id=list(instanceids)) if related_resources: for resource in related_resources['docs']: ret['related_resources'].append(resource['_source']) return ret
def get_relations(resourceinstanceid, start, limit): query = Query(se, start=start, limit=limit) bool_filter = Bool() bool_filter.should(Terms(field='resourceinstanceidfrom', terms=resourceinstanceid)) bool_filter.should(Terms(field='resourceinstanceidto', terms=resourceinstanceid)) query.add_query(bool_filter) return query.search(index='resource_relations', doc_type='all')
def get_preflabel_from_conceptid(conceptid, lang): ret = None default = { "category": "", "conceptid": "", "language": "", "value": "", "type": "", "id": "" } se = SearchEngineFactory().create() query = Query(se) bool_query = Bool() bool_query.must(Match(field='type', query='prefLabel', type='phrase')) bool_query.filter(Terms(field='conceptid', terms=[conceptid])) query.add_query(bool_query) preflabels = query.search(index='strings', doc_type='concept')['hits']['hits'] for preflabel in preflabels: default = preflabel['_source'] # get the label in the preferred language, otherwise get the label in the default language if preflabel['_source']['language'] == lang: return preflabel['_source'] if preflabel['_source']['language'].split('-')[0] == lang.split('-')[0]: ret = preflabel['_source'] if preflabel['_source']['language'] == settings.LANGUAGE_CODE and ret == None: ret = preflabel['_source'] return default if ret == None else ret
def delete_index(self): se = SearchEngineFactory().create() query = Query(se, start=0, limit=10000) phrase = Match(field='conceptid', query=self.conceptid, type='phrase') query.add_query(phrase) query.delete(index='concept_labels') se.delete_terms(self.id)
def index_resources(clear_index=True, index_name=None, batch_size=settings.BULK_IMPORT_BATCH_SIZE): """ Indexes all resources from the database Keyword Arguments: clear_index -- set to True to remove all the resources from the index before the reindexing operation index_name -- only applies to custom indexes and if given will try and just refresh the data in that index batch_size -- the number of records to index as a group, the larger the number to more memory required """ se = SearchEngineFactory().create() if clear_index and index_name is None: q = Query(se=se) q.delete(index="terms") resource_types = (models.GraphModel.objects.filter( isresource=True).exclude( graphid=settings.SYSTEM_SETTINGS_RESOURCE_MODEL_ID).values_list( "graphid", flat=True)) index_resources_by_type(resource_types, clear_index=clear_index, index_name=index_name, batch_size=batch_size)
def get_relations(resourceinstanceid, start, limit, resourceinstance_graphid=None): query = Query(se, start=start, limit=limit) bool_filter = Bool() bool_filter.should( Terms(field="resourceinstanceidfrom", terms=resourceinstanceid)) bool_filter.should( Terms(field="resourceinstanceidto", terms=resourceinstanceid)) if resourceinstance_graphid: graph_id_filter = Bool() graph_id_filter.should( Terms(field="resourceinstancefrom_graphid", terms=resourceinstance_graphid)) graph_id_filter.should( Terms(field="resourceinstanceto_graphid", terms=resourceinstance_graphid)) bool_filter.must(graph_id_filter) query.add_query(bool_filter) return query.search(index=RESOURCE_RELATIONS_INDEX)
def delete_concept_values_index(concepts_to_delete): se = SearchEngineFactory().create() for concept in concepts_to_delete.itervalues(): query = Query(se, start=0, limit=10000) term = Term(field='conceptid', term=concept.id) query.add_query(term) query.delete(index='strings', doc_type='concept')
def get_restricted_instances(user, search_engine=None, allresources=False): if allresources is False and user.is_superuser is True: return [] if allresources is True: restricted_group_instances = { perm["object_pk"] for perm in GroupObjectPermission.objects.filter(permission__codename="no_access_to_resourceinstance").values("object_pk") } restricted_user_instances = { perm["object_pk"] for perm in UserObjectPermission.objects.filter(permission__codename="no_access_to_resourceinstance").values("object_pk") } all_restricted_instances = list(restricted_group_instances | restricted_user_instances) return all_restricted_instances else: terms = Terms(field="permissions.users_with_no_access", terms=[str(user.id)]) query = Query(search_engine, start=0, limit=settings.SEARCH_RESULT_LIMIT) has_access = Bool() nested_term_filter = Nested(path="permissions", query=terms) has_access.must(nested_term_filter) query.add_query(has_access) results = query.search(index=RESOURCES_INDEX, scroll="1m") scroll_id = results["_scroll_id"] total = results["hits"]["total"]["value"] if total > settings.SEARCH_RESULT_LIMIT: pages = total // settings.SEARCH_RESULT_LIMIT for page in range(pages): results_scrolled = query.se.es.scroll(scroll_id=scroll_id, scroll="1m") results["hits"]["hits"] += results_scrolled["hits"]["hits"] restricted_ids = [res["_id"] for res in results["hits"]["hits"]] return restricted_ids
def delete(self, *args, **kwargs): se = SearchEngineFactory().create() request = kwargs.pop("request", None) provisional_edit_log_details = kwargs.pop("provisional_edit_log_details", None) for tile in self.tiles: tile.delete(*args, request=request, **kwargs) try: user = request.user user_is_reviewer = user_is_resource_reviewer(user) except AttributeError: # no user user = None user_is_reviewer = True if user_is_reviewer is True or self.user_owns_provisional(user): query = Query(se) bool_query = Bool() bool_query.filter(Terms(field="tileid", terms=[self.tileid])) query.add_query(bool_query) results = query.search(index="terms")["hits"]["hits"] for result in results: se.delete(index="terms", id=result["_id"]) self.__preDelete(request) self.save_edit( user=request.user, edit_type="tile delete", old_value=self.data, provisional_edit_log_details=provisional_edit_log_details ) super(Tile, self).delete(*args, **kwargs) resource = Resource.objects.get(resourceinstanceid=self.resourceinstance.resourceinstanceid) resource.index() else: self.apply_provisional_edit(user, data={}, action="delete") super(Tile, self).save(*args, **kwargs)
def test_bulk_delete(self): """ Test bulk deleting of documents in Elasticsearch """ se = SearchEngineFactory().create() # se.create_index(index='test') for i in range(10): x = { 'id': i, 'type': 'prefLabel', 'value': 'test pref label', } se.index_data(index='test', doc_type='test', body=x, idfield='id', refresh=True) y = { 'id': i + 100, 'type': 'altLabel', 'value': 'test alt label', } se.index_data(index='test', doc_type='test', body=y, idfield='id', refresh=True) query = Query(se, start=0, limit=100) match = Match(field='type', query='altLabel') query.add_query(match) query.delete(index='test', refresh=True) self.assertEqual(se.es.count(index='test', doc_type='test')['count'], 10)
def get_preflabel_from_conceptid(conceptid, lang): ret = None default = { "category": "", "conceptid": "", "language": "", "value": "", "type": "", "id": "" } se = SearchEngineFactory().create() query = Query(se) terms = Terms(field='conceptid', terms=[conceptid]) # Uncomment the following line only after having reindexed ElasticSearch cause currently the Arabic labels are indexed as altLabels # match = Match(field='type', query='prefLabel', type='phrase') query.add_filter(terms) # Uncomment the following line only after having reindexed ElasticSearch cause currently the Arabic labels are indexed as altLabels # query.add_query(match) preflabels = query.search(index='concept_labels')['hits']['hits'] for preflabel in preflabels: # print 'Language at this point %s and label language %s and ret is %s' % (lang, preflabel['_source']['language'], ret) default = preflabel['_source'] # get the label in the preferred language, otherwise get the label in the default language if preflabel['_source']['language'] == lang: # print 'prefLabel from Conceptid: %s' % preflabel['_source'] return preflabel['_source'] if preflabel['_source']['language'].split('-')[0] == lang.split( '-')[0]: ret = preflabel['_source'] if preflabel['_source']['language'] == lang and ret == None: ret = preflabel['_source'] return default if ret == None else ret
def get_auto_filter(request): lang = request.GET.get('lang', settings.LANGUAGE_CODE) se1 = SearchEngineFactory().create() searchString1 = settings.PUBLISHED_LABEL query1 = Query(se1, start=0, limit=settings.SEARCH_DROPDOWN_LENGTH) boolquery1 = Bool() boolquery1.should(Match(field='term', query=searchString1.lower(), type='phrase_prefix', fuzziness='AUTO')) boolquery1.should(Match(field='term.folded', query=searchString1.lower(), type='phrase_prefix', fuzziness='AUTO')) boolquery1.should(Match(field='term.folded', query=searchString1.lower(), fuzziness='AUTO')) query1.add_query(boolquery1) results1 = query1.search(index='term', doc_type='value') conceptid1 = '' context1 = '' for result1 in results1['hits']['hits']: prefLabel = get_preflabel_from_conceptid(result1['_source']['context'], lang) result1['_source']['options']['context_label'] = prefLabel['value'] if (prefLabel['value'] == settings.EW_STATUS_TERM and result1['_source']['term'] == settings.PUBLISHED_LABEL) : conceptid1 = result1['_source']['options']['conceptid'] context1 = result1['_source']['context'] AUTO_TERM_FILTER = {"inverted": False, "type": "concept"} AUTO_TERM_FILTER["text"] = settings.PUBLISHED_LABEL AUTO_TERM_FILTER["value"] = conceptid1 AUTO_TERM_FILTER["context"] = context1 AUTO_TERM_FILTER["context_label"] = settings.EW_STATUS_TERM AUTO_TERM_FILTER["id"] = AUTO_TERM_FILTER['text'] + conceptid1 return AUTO_TERM_FILTER
def get_preflabel_from_conceptid(conceptid, lang): ret = None default = { "category": "", "conceptid": "", "language": "", "value": "", "type": "", "id": "" } se = SearchEngineFactory().create() query = Query(se) terms = Terms(field='conceptid', terms=[conceptid]) # Uncomment the following line only after having reindexed ElasticSearch cause currently the Arabic labels are indexed as altLabels # match = Match(field='type', query='prefLabel', type='phrase') query.add_filter(terms) # Uncomment the following line only after having reindexed ElasticSearch cause currently the Arabic labels are indexed as altLabels # query.add_query(match) preflabels = query.search(index='concept_labels')['hits']['hits'] for preflabel in preflabels: # print 'Language at this point %s and label language %s and ret is %s' % (lang, preflabel['_source']['language'], ret) default = preflabel['_source'] # get the label in the preferred language, otherwise get the label in the default language if preflabel['_source']['language'] == lang: # print 'prefLabel from Conceptid: %s' % preflabel['_source'] return preflabel['_source'] if preflabel['_source']['language'].split('-')[0] == lang.split('-')[0]: ret = preflabel['_source'] if preflabel['_source']['language'] == lang and ret == None: ret = preflabel['_source'] return default if ret == None else ret
def delete(self, user={}, note=''): """ Deletes a single resource and any related indexed data """ se = SearchEngineFactory().create() related_resources = self.get_related_resources(lang="en-US", start=0, limit=1000) for rr in related_resources['resource_relationships']: models.ResourceXResource.objects.get(pk=rr['resourcexid']).delete() query = Query(se) bool_query = Bool() bool_query.filter( Terms(field='resourceinstanceid', terms=[self.resourceinstanceid])) query.add_query(bool_query) results = query.search(index='strings', doc_type='term')['hits']['hits'] for result in results: se.delete(index='strings', doc_type='term', id=result['_id']) se.delete(index='resource', doc_type=str(self.graph_id), id=self.resourceinstanceid) self.save_edit(edit_type='delete') super(Resource, self).delete()
def get_related_resources(resourceid, lang, limit=1000, start=0): ret = { 'resource_relationships': [], 'related_resources': [] } se = SearchEngineFactory().create() query = Query(se, limit=limit, start=start) query.add_filter(Terms(field='entityid1', terms=resourceid).dsl, operator='or') query.add_filter(Terms(field='entityid2', terms=resourceid).dsl, operator='or') resource_relations = query.search(index='resource_relations', doc_type='all') ret['total'] = resource_relations['hits']['total'] entityids = set() for relation in resource_relations['hits']['hits']: relation['_source']['preflabel'] = get_preflabel_from_valueid(relation['_source']['relationshiptype'], lang) ret['resource_relationships'].append(relation['_source']) entityids.add(relation['_source']['entityid1']) entityids.add(relation['_source']['entityid2']) if len(entityids) > 0: entityids.remove(resourceid) related_resources = se.search(index='entity', doc_type='_all', id=list(entityids)) if related_resources: for resource in related_resources['docs']: ret['related_resources'].append(resource['_source']) return ret
def arch_investigation_layer(request, boundtype=''): data = [] geom_param = request.GET.get('geom', None) bbox = request.GET.get('bbox', '') limit = request.GET.get('limit', settings.MAP_LAYER_FEATURE_LIMIT) geojson_collection = { "type": "FeatureCollection", "features": [] } se = SearchEngineFactory().create() query = Query(se, limit=limit) args = { 'index':'entity', 'doc_type':'ARCHAEOLOGICAL_ZONE.E53', } data = query.search(**args) for item in data['hits']['hits']: for geom in item['_source']['geometries']: if geom['entitytypeid'] == 'SHOVEL_TEST_GEOMETRY.E47': print json.dumps(geom,indent=2) feat = { 'geometry':geom['value'], 'type':"Feature", 'id':item['_source']['entityid'], } geojson_collection['features'].append(feat) return JSONResponse(geojson_collection)
def index_resources(clear_index=True, batch_size=settings.BULK_IMPORT_BATCH_SIZE, quiet=False): """ Indexes all resources from the database Keyword Arguments: clear_index -- set to True to remove all the resources from the index before the reindexing operation batch_size -- the number of records to index as a group, the larger the number to more memory required quiet -- Silences the status bar output during certain operations, use in celery operations for example """ if clear_index: q = Query(se=se) q.delete(index=TERMS_INDEX) resource_types = (models.GraphModel.objects.filter( isresource=True).exclude( graphid=settings.SYSTEM_SETTINGS_RESOURCE_MODEL_ID).values_list( "graphid", flat=True)) index_resources_by_type(resource_types, clear_index=clear_index, batch_size=batch_size, quiet=quiet)
def get_relations(resourceinstanceid, start, limit): query = Query(se, start=start, limit=limit) bool_filter = Bool() bool_filter.should(Terms(field="resourceinstanceidfrom", terms=resourceinstanceid)) bool_filter.should(Terms(field="resourceinstanceidto", terms=resourceinstanceid)) query.add_query(bool_filter) return query.search(index="resource_relations")
def get_preflabel_from_conceptid(conceptid, lang): ret = None default = { "category": "", "conceptid": "", "language": "", "value": "", "type": "", "id": "" } query = Query(se) bool_query = Bool() bool_query.must(Match(field="type", query="prefLabel", type="phrase")) bool_query.filter(Terms(field="conceptid", terms=[conceptid])) query.add_query(bool_query) preflabels = query.search(index=CONCEPTS_INDEX)["hits"]["hits"] for preflabel in preflabels: default = preflabel["_source"] # get the label in the preferred language, otherwise get the label in the default language if preflabel["_source"]["language"] == lang: return preflabel["_source"] if preflabel["_source"]["language"].split("-")[0] == lang.split( "-")[0]: ret = preflabel["_source"] if preflabel["_source"][ "language"] == settings.LANGUAGE_CODE and ret is None: ret = preflabel["_source"] return default if ret is None else ret
def delete(self, *args, **kwargs): se = SearchEngineFactory().create() request = kwargs.pop('request', None) provisional_edit_log_details = kwargs.pop('provisional_edit_log_details', None) for tile in self.tiles: tile.delete(*args, request=request, **kwargs) try: user = request.user user_is_reviewer = request.user.groups.filter(name='Resource Reviewer').exists() except AttributeError: #no user user = None if user_is_reviewer is True or self.user_owns_provisional(user): query = Query(se) bool_query = Bool() bool_query.filter(Terms(field='tileid', terms=[self.tileid])) query.add_query(bool_query) results = query.search(index='terms')['hits']['hits'] for result in results: se.delete(index='terms', id=result['_id']) self.__preDelete(request) self.save_edit( user=request.user, edit_type='tile delete', old_value=self.data, provisional_edit_log_details=provisional_edit_log_details) super(Tile, self).delete(*args, **kwargs) resource = Resource.objects.get(resourceinstanceid=self.resourceinstance.resourceinstanceid) resource.index() else: self.apply_provisional_edit(user, data={}, action='delete') super(Tile, self).save(*args, **kwargs)
def find_overlapping(request): '''This function queries ES when called via Ajax when a new geometry is created in the Location tab. If pre-existing resources are found within the perimeter of the polygon (or the buffered zone around a point/line/polygon), an alert is raised.''' geomString = request.GET.get('geom', '') geom = GEOSGeometry(geomString, srid=4326) mindistance = settings.METER_RADIUS if not mindistance: mindistance = 1000 # if settings.METER_RADIUS isn't set, default to 1Km geom.transform(3857) buffered_geom = geom.buffer(mindistance) buffered_geom.transform(4326) print geom, buffered_geom se = SearchEngineFactory().create() query = Query(se) boolfilter = Bool() geoshape = GeoShape(field='geometries.value', type=buffered_geom.geom_type, coordinates=buffered_geom.coords) nested = Nested(path='geometries', query=geoshape) boolfilter.must(nested) query.add_filter(boolfilter) results = query.search(index='entity', doc_type='') overlaps = [] for hit in results['hits']['hits']: overlaps.append({ 'id': hit['_id'], 'type': hit['_type'], 'primaryname': hit['_source']['primaryname'] }) return JSONResponse(overlaps)
def reverse_func(apps, schema_editor): extensions = [os.path.join(settings.ONTOLOGY_PATH, x) for x in settings.ONTOLOGY_EXT] management.call_command('load_ontology', source=os.path.join(settings.ONTOLOGY_PATH, settings.ONTOLOGY_BASE), version=settings.ONTOLOGY_BASE_VERSION, ontology_name=settings.ONTOLOGY_BASE_NAME, id=settings.ONTOLOGY_BASE_ID, extensions=','.join(extensions), verbosity=0) Node = apps.get_model("models", "Node") Edge = apps.get_model("models", "Edge") for node in Node.objects.all(): node.ontologyclass = str(node.ontologyclass).split('/')[-1] node.save() for edge in Edge.objects.all(): edge.ontologyproperty = str(edge.ontologyproperty).split('/')[-1] edge.save() # remove index for base Arches concept se = SearchEngineFactory().create() query = Query(se, start=0, limit=10000) query.add_query(Term(field='conceptid', term='00000000-0000-0000-0000-000000000001')) query.delete(index='strings', doc_type='concept') try: DValueType = apps.get_model("models", "DValueType") DValueType.objects.get(valuetype='identifier').delete() except: pass
def delete(self, user={}, note=""): """ Deletes a single resource and any related indexed data """ permit_deletion = False graph = models.GraphModel.objects.get(graphid=self.graph_id) if graph.isactive is False: message = _("This model is not yet active; unable to delete.") raise ModelInactiveError(message) if user != {}: user_is_reviewer = user_is_resource_reviewer(user) if user_is_reviewer is False: tiles = list(models.TileModel.objects.filter(resourceinstance=self)) resource_is_provisional = True if sum([len(t.data) for t in tiles]) == 0 else False if resource_is_provisional is True: permit_deletion = True else: permit_deletion = True else: permit_deletion = True if permit_deletion is True: related_resources = self.get_related_resources(lang="en-US", start=0, limit=1000, page=0) for rr in related_resources["resource_relationships"]: # delete any related resource entries, also reindex the resource that references this resource that's being deleted try: resourceXresource = models.ResourceXResource.objects.get(pk=rr["resourcexid"]) resource_to_reindex = ( resourceXresource.resourceinstanceidfrom_id if resourceXresource.resourceinstanceidto_id == self.resourceinstanceid else resourceXresource.resourceinstanceidto_id ) resourceXresource.delete(deletedResourceId=self.resourceinstanceid) res = Resource.objects.get(pk=resource_to_reindex) res.load_tiles() res.index() except ObjectDoesNotExist: se.delete(index=RESOURCE_RELATIONS_INDEX, id=rr["resourcexid"]) query = Query(se) bool_query = Bool() bool_query.filter(Terms(field="resourceinstanceid", terms=[self.resourceinstanceid])) query.add_query(bool_query) results = query.search(index=TERMS_INDEX)["hits"]["hits"] for result in results: se.delete(index=TERMS_INDEX, id=result["_id"]) se.delete(index=RESOURCES_INDEX, id=self.resourceinstanceid) try: self.save_edit(edit_type="delete", user=user, note=self.displayname) except: pass super(Resource, self).delete() return permit_deletion
def build_search_terms_dsl(request): se = SearchEngineFactory().create() searchString = request.GET.get('q', '') query = Query(se, start=0, limit=settings.SEARCH_DROPDOWN_LENGTH) boolquery = Bool() boolquery.should(Match(field='term', query=searchString.lower(), type='phrase_prefix', fuzziness='AUTO')) boolquery.should(Match(field='term.folded', query=searchString.lower(), type='phrase_prefix', fuzziness='AUTO')) boolquery.should(Match(field='term.folded', query=searchString.lower(), fuzziness='AUTO')) query.add_query(boolquery) return query
def get_relations(resourceinstanceid, start, limit): query = Query(se, limit=limit, start=start) bool_filter = Bool() bool_filter.should( Terms(field='resourceinstanceidfrom', terms=resourceinstanceid)) bool_filter.should( Terms(field='resourceinstanceidto', terms=resourceinstanceid)) query.add_query(bool_filter) return query.search(index='resource_relations', doc_type='all')
def get_related_resources(resourceid, lang, limit=1000, start=0, allowedtypes=[], is_anon=False): ret = { 'resource_relationships': [], 'related_resources': [] } se = SearchEngineFactory().create() query = Query(se, limit=limit, start=start) query.add_filter(Terms(field='entityid1', terms=resourceid).dsl, operator='or') query.add_filter(Terms(field='entityid2', terms=resourceid).dsl, operator='or') resource_relations = query.search(index='resource_relations', doc_type="all") entityids = set() for relation in resource_relations['hits']['hits']: relation['_source']['preflabel'] = get_preflabel_from_valueid(relation['_source']['relationshiptype'], lang) ret['resource_relationships'].append(relation['_source']) entityids.add(relation['_source']['entityid1']) entityids.add(relation['_source']['entityid2']) if len(entityids) > 0: entityids.remove(resourceid) # can't figure why passing allowed types to doc_type param doesn't work, # so filter is carried out later related_resources = se.search(index='entity', doc_type='_all', id=list(entityids)) filtered_ids = [] if related_resources: for resource in related_resources['docs']: if not resource['_type'] in allowedtypes: filtered_ids.append(resource['_source']['entityid']) continue if is_anon: # filter out protected resources if user is anonymous # (this is basically a subset of the get_protected_entityids below # they should be combined probably) from search import get_protection_conceptids protect_id = get_protection_conceptids(settings.PROTECTION_LEVEL_NODE) conceptids = [d['conceptid'] for d in resource['_source']['domains']] if protect_id in conceptids: filtered_ids.append(resource['_source']['entityid']) continue ret['related_resources'].append(resource['_source']) if len(filtered_ids) > 0: # remove all relationships in ret that match a filtered id (this lc is yuge but I think concise) filtered_relationships = [rel for rel in ret['resource_relationships'] if not rel['entityid1'] in filtered_ids and not rel['entityid2'] in filtered_ids] # update ret values ret['resource_relationships'] = filtered_relationships ret['total'] = len(ret['resource_relationships']) return ret
def index_resource_relations(clear_index=True, batch_size=settings.BULK_IMPORT_BATCH_SIZE): """ Indexes all resource to resource relation records Keyword Arguments: clear_index -- set to True to remove all the resources from the index before the reindexing operation batch_size -- the number of records to index as a group, the larger the number to more memory required """ start = datetime.now() print("Indexing resource to resource relations") cursor = connection.cursor() if clear_index: q = Query(se=se) q.delete(index=RESOURCE_RELATIONS_INDEX) with se.BulkIndexer(batch_size=batch_size, refresh=True) as resource_relations_indexer: sql = """ SELECT resourcexid, notes, datestarted, dateended, relationshiptype, resourceinstanceidfrom, resourceinstancefrom_graphid, resourceinstanceidto, resourceinstanceto_graphid, modified, created, inverserelationshiptype, tileid, nodeid FROM public.resource_x_resource """ cursor.execute(sql) for resource_relation in cursor.fetchall(): doc = { "resourcexid": resource_relation[0], "notes": resource_relation[1], "datestarted": resource_relation[2], "dateended": resource_relation[3], "relationshiptype": resource_relation[4], "resourceinstanceidfrom": resource_relation[5], "resourceinstancefrom_graphid": resource_relation[6], "resourceinstanceidto": resource_relation[7], "resourceinstanceto_graphid": resource_relation[8], "modified": resource_relation[9], "created": resource_relation[10], "inverserelationshiptype": resource_relation[11], "tileid": resource_relation[12], "nodeid": resource_relation[13], } resource_relations_indexer.add(index=RESOURCE_RELATIONS_INDEX, id=doc["resourcexid"], data=doc) index_count = se.count(index=RESOURCE_RELATIONS_INDEX) print("Status: {0}, In Database: {1}, Indexed: {2}, Took: {3} seconds". format("Passed" if cursor.rowcount == index_count else "Failed", cursor.rowcount, index_count, (datetime.now() - start).seconds))
def map_layers(request, entitytypeid='all', get_centroids=False): data = [] geom_param = request.GET.get('geom', None) bbox = request.GET.get('bbox', '') limit = request.GET.get('limit', settings.MAP_LAYER_FEATURE_LIMIT) entityids = request.GET.get('entityid', '') geojson_collection = {"type": "FeatureCollection", "features": []} se = SearchEngineFactory().create() query = Query(se, limit=limit) args = {'index': 'maplayers'} if entitytypeid != 'all': args['doc_type'] = entitytypeid if entityids != '': for entityid in entityids.split(','): geojson_collection['features'].append( se.search(index='maplayers', id=entityid)['_source']) return JSONResponse(geojson_collection) data = query.search(**args) if not data: return JSONResponse({}) for item in data['hits']['hits']: # Ce uporabnik ni avtenticiran, prikazemo le veljavne (to je verjetno potrebno se dodelati (mogoce da vidijo le svoje???)!!!) if (not request.user.username != 'anonymous'): if (item['_source']['properties']['ewstatus'] != settings.PUBLISHED_LABEL): continue if get_centroids: item['_source']['geometry'] = item['_source']['properties'][ 'centroid'] #item['_source'].pop('properties', None) item['_source']['properties'].pop('extent', None) item['_source']['properties'].pop('elements', None) item['_source']['properties'].pop('entitytypeid', None) item['_source']['properties'].pop('constructions', None) item['_source']['properties'].pop('centroid', None) item['_source']['properties'].pop('ewstatus', None) item['_source']['properties'].pop('address', None) item['_source']['properties'].pop('designations', None) item['_source']['properties'].pop('primaryname', None) item['_source']['properties'].pop('resource_type', None) elif geom_param != None: item['_source']['geometry'] = item['_source']['properties'][ geom_param] item['_source']['properties'].pop('extent', None) item['_source']['properties'].pop(geom_param, None) else: item['_source']['properties'].pop('extent', None) item['_source']['properties'].pop('centroid', None) geojson_collection['features'].append(item['_source']) return JSONResponse(geojson_collection)
def map_layers(request, entitytypeid='all', get_centroids=False): data = [] geom_param = request.GET.get('geom', None) bbox = request.GET.get('bbox', '') limit = request.GET.get('limit', settings.MAP_LAYER_FEATURE_LIMIT) entityids = request.GET.get('entityid', '') geojson_collection = { "type": "FeatureCollection", "features": [] } se = SearchEngineFactory().create() query = Query(se, limit=limit) args = { 'index': 'maplayers' } if entitytypeid != 'all': args['doc_type'] = entitytypeid if entityids != '': for entityid in entityids.split(','): geojson_collection['features'].append(se.search(index='maplayers', id=entityid)['_source']) return JSONResponse(geojson_collection) data = query.search(**args) if not data: return JSONResponse({}) for item in data['hits']['hits']: # Ce uporabnik ni avtenticiran, prikazemo le veljavne (to je verjetno potrebno se dodelati (mogoce da vidijo le svoje???)!!!) if (not request.user.username != 'anonymous'): if (item['_source']['properties']['ewstatus'] != settings.PUBLISHED_LABEL): continue if get_centroids: item['_source']['geometry'] = item['_source']['properties']['centroid'] #item['_source'].pop('properties', None) item['_source']['properties'].pop('extent', None) item['_source']['properties'].pop('elements', None) item['_source']['properties'].pop('entitytypeid', None) item['_source']['properties'].pop('constructions', None) item['_source']['properties'].pop('centroid', None) item['_source']['properties'].pop('ewstatus', None) item['_source']['properties'].pop('address', None) item['_source']['properties'].pop('designations', None) item['_source']['properties'].pop('primaryname', None) item['_source']['properties'].pop('resource_type', None) elif geom_param != None: item['_source']['geometry'] = item['_source']['properties'][geom_param] item['_source']['properties'].pop('extent', None) item['_source']['properties'].pop(geom_param, None) else: item['_source']['properties'].pop('extent', None) item['_source']['properties'].pop('centroid', None) geojson_collection['features'].append(item['_source']) return JSONResponse(geojson_collection)
def map_layers(request, entitytypeid='all', get_centroids=False): data = [] geom_param = request.GET.get('geom', None) bbox = request.GET.get('bbox', '') limit = request.GET.get('limit', settings.MAP_LAYER_FEATURE_LIMIT) entityids = request.GET.get('entityid', '') geojson_collection = {"type": "FeatureCollection", "features": []} se = SearchEngineFactory().create() query = Query(se, limit=limit) args = {'index': 'maplayers'} if entitytypeid != 'all': args['doc_type'] = entitytypeid if entityids != '': for entityid in entityids.split(','): geojson_collection['features'].append( se.search(index='maplayers', id=entityid)['_source']) return JSONResponse(geojson_collection) if get_centroids: # If we are just fetching the centroids, we can do a slightly optimised query by having elasticsearch pull out relevant fields args['fields'] = [ 'properties.centroid.coordinates', 'type', '_source.id' ] data = query.search(**args) geojson_collection['features'] = [{ "geometry": { "type": "Point", "coordinates": item['fields']['properties.centroid.coordinates'] }, "type": "Feature", "id": item['_id'] } for item in data['hits']['hits']] else: # We need the full data for each record data = query.search(**args) for item in data['hits']['hits']: if geom_param != None: item['_source']['geometry'] = item['_source']['properties'][ geom_param] item['_source']['properties'].pop('extent', None) item['_source']['properties'].pop(geom_param, None) else: item['_source']['properties'].pop('extent', None) item['_source']['properties'].pop('centroid', None) geojson_collection['features'].append(item['_source']) return JSONResponse(geojson_collection)
def map_layers(request, entitytypeid='all', get_centroids=False): data = [] geom_param = request.GET.get('geom', None) bbox = request.GET.get('bbox', '') limit = request.GET.get('limit', settings.MAP_LAYER_FEATURE_LIMIT) entityids = request.GET.get('entityid', '') geojson_collection = { "type": "FeatureCollection", "features": [] } se = SearchEngineFactory().create() query = Query(se, limit=limit) args = { 'index': 'maplayers' } if entitytypeid != 'all': args['doc_type'] = entitytypeid if entityids != '': for entityid in entityids.split(','): geojson_collection['features'].append(se.search(index='maplayers', id=entityid)['_source']) return JSONResponse(geojson_collection) data = query.search(**args) # if anonymous user, get list of protected entity ids to be excluded from map protected = [] if request.user.username == 'anonymous': protected = get_protected_entityids() print protected for item in data['hits']['hits']: if item['_id'] in protected: print "hide this one" print json.dumps(item,indent=2) continue if get_centroids: item['_source']['geometry'] = item['_source']['properties']['centroid'] item['_source'].pop('properties', None) elif geom_param != None: item['_source']['geometry'] = item['_source']['properties'][geom_param] item['_source']['properties'].pop('extent', None) item['_source']['properties'].pop(geom_param, None) else: item['_source']['properties'].pop('extent', None) item['_source']['properties'].pop('centroid', None) geojson_collection['features'].append(item['_source']) return JSONResponse(geojson_collection)
def delete(self, user={}, note=''): """ Deletes a single resource and any related indexed data """ permit_deletion = False graph = models.GraphModel.objects.get(graphid=self.graph_id) if graph.isactive is False: message = _('This model is not yet active; unable to delete.') raise ModelInactiveError(message) if user != {}: user_is_reviewer = user.groups.filter( name='Resource Reviewer').exists() if user_is_reviewer is False: tiles = list( models.TileModel.objects.filter(resourceinstance=self)) resource_is_provisional = True if sum( [len(t.data) for t in tiles]) == 0 else False if resource_is_provisional is True: permit_deletion = True else: permit_deletion = True else: permit_deletion = True if permit_deletion is True: se = SearchEngineFactory().create() related_resources = self.get_related_resources(lang="en-US", start=0, limit=1000, page=0) for rr in related_resources['resource_relationships']: models.ResourceXResource.objects.get( pk=rr['resourcexid']).delete() query = Query(se) bool_query = Bool() bool_query.filter( Terms(field='resourceinstanceid', terms=[self.resourceinstanceid])) query.add_query(bool_query) results = query.search(index='terms')['hits']['hits'] for result in results: se.delete(index='terms', id=result['_id']) se.delete(index='resources', id=self.resourceinstanceid) self.save_edit(edit_type='delete', user=user, note=self.displayname) super(Resource, self).delete() return permit_deletion
def get_related_resources(resourceid, lang='en-US', limit=1000, start=0): ret = {'resource_relationships': [], 'related_resources': []} se = SearchEngineFactory().create() query = Query(se, limit=limit, start=start) query.add_filter(Terms(field='entityid1', terms=resourceid).dsl, operator='or') resource_relations = query.search(index='resource_relations', doc_type='all') ret['total'] = resource_relations['hits']['total'] for relation in resource_relations['hits']['hits']: ret['resource_relationships'].append(relation['_source']) return ret
def clear_resources(): """Removes all resource instances from your db and elasticsearch resource index""" se = SearchEngineFactory().create() match_all_query = Query(se) match_all_query.delete(index="terms") match_all_query.delete(index="resources") match_all_query.delete(index="resource_relations") print( "deleting", Resource.objects.exclude( resourceinstanceid=settings.RESOURCE_INSTANCE_ID).count(), "resources") Resource.objects.exclude( resourceinstanceid=settings.RESOURCE_INSTANCE_ID).delete() print( Resource.objects.exclude( resourceinstanceid=settings.RESOURCE_INSTANCE_ID).count(), "resources remaining") print("deleting", models.ResourceXResource.objects.count(), "resource relationships") cursor = connection.cursor() cursor.execute("TRUNCATE public.resource_x_resource CASCADE;") print(models.ResourceXResource.objects.count(), "resource relationships remaining")
def get_related_resources(resourceid, lang='en-US', limit=1000, start=0): ret = { 'resource_relationships': [], 'related_resources': [] } se = SearchEngineFactory().create() query = Query(se, limit=limit, start=start) query.add_filter(Terms(field='entityid1', terms=resourceid).dsl, operator='or') resource_relations = query.search(index='resource_relations', doc_type='all') ret['total'] = resource_relations['hits']['total'] for relation in resource_relations['hits']['hits']: ret['resource_relationships'].append(relation['_source']) return ret
def get_search_range_contexts(request): search_range_context = {} search_range_context = cache.get('search_range_contexts') if search_range_context is not None: #print 'Search_range_context iz cacha!' return search_range_context lang = request.GET.get('lang', request.LANGUAGE_CODE) se1 = SearchEngineFactory().create() context_label1 = '-' search_range_context = {} for search_term in settings.RANGE_TERMS: searchString1 = search_term['text'] query1 = Query(se1, start=0, limit=settings.SEARCH_DROPDOWN_LENGTH) boolquery1 = Bool() boolquery1.should(Match(field='term', query=searchString1.lower(), type='phrase_prefix', fuzziness='AUTO')) boolquery1.should(Match(field='term.folded', query=searchString1.lower(), type='phrase_prefix', fuzziness='AUTO')) boolquery1.should(Match(field='term.folded', query=searchString1.lower(), fuzziness='AUTO')) query1.add_query(boolquery1) results1 = query1.search(index='term', doc_type='value') conceptid1 = '' context1 = '' for result1 in results1['hits']['hits']: prefLabel = get_preflabel_from_conceptid(result1['_source']['context'], lang) result1['_source']['options']['context_label'] = prefLabel['value'] if (prefLabel['value'] == search_term['context_label'] and result1['_source']['term'] == search_term['text']): #print result1['_source']['ids'][0] conceptid1 = result1['_source']['options']['conceptid'] context1 = result1['_source']['context'] valueid1 = result1['_source']['ids'][0] #print search_term['context_label'] + ': ' + conceptid1 #print searchString1 #print result1 result = {'conceptid': conceptid1, 'context': context1, 'valueid': valueid1} if context_label1 <> search_term['context_label']: value = {} #print result value[search_term['text_key']] = result #print value search_range_context[search_term['context_key']] = value #print search_range_context #print 'Iscem [' + search_term['context_label'] + '][' + search_term['text'] + ']' #print value context_label1 = search_term['context_label'] #print search_range_context #print search_range_context['Historical_Period']['BRONZE_AGE'] #print 'Shranjujem search_range_context v cache' cache.set('search_range_contexts', search_range_context, 86400) return search_range_context
def search_results(request): se = SearchEngineFactory().create() search_results_object = {"query": Query(se)} include_provisional = get_provisional_type(request) permitted_nodegroups = get_permitted_nodegroups(request.user) search_filter_factory = SearchFilterFactory(request) try: for filter_type, querystring in list( request.GET.items()) + [("search-results", "")]: search_filter = search_filter_factory.get_filter(filter_type) if search_filter: search_filter.append_dsl(search_results_object, permitted_nodegroups, include_provisional) except Exception as err: return JSONResponse(err, status=500) dsl = search_results_object.pop("query", None) dsl.include("graph_id") dsl.include("root_ontology_class") dsl.include("resourceinstanceid") dsl.include("points") dsl.include("geometries") dsl.include("displayname") dsl.include("displaydescription") dsl.include("map_popup") dsl.include("provisional_resource") if request.GET.get("tiles", None) is not None: dsl.include("tiles") results = dsl.search(index="resources") ret = {} if results is not None: # allow filters to modify the results for filter_type, querystring in list( request.GET.items()) + [("search-results", "")]: search_filter = search_filter_factory.get_filter(filter_type) if search_filter: search_filter.post_search_hook(search_results_object, results, permitted_nodegroups) ret["results"] = results for key, value in list(search_results_object.items()): ret[key] = value ret["reviewer"] = request.user.groups.filter( name="Resource Reviewer").exists() ret["timestamp"] = datetime.now() ret["total_results"] = dsl.count(index="resources") return JSONResponse(ret) else: ret = { "message": _("There was an error retrieving the search results") } return JSONResponse(ret, status=500)
def index_resources(self, resources=None, batch_size=settings.BULK_IMPORT_BATCH_SIZE, quiet=False): """ Indexes a list of resources in bulk to Elastic Search Keyword Arguments: resources -- the list of resource instances to index batch_size -- the number of records to index as a group, the larger the number to more memory required quiet -- Silences the status bar output during certain operations, use in celery operations for example Return: None """ start = datetime.now() q = Query(se=self.se) self.se.refresh(index=self.index_name) count_before = self.se.count(index=self.index_name, body=q.dsl) result_summary = {"database": len(resources), "indexed": 0} if quiet is False: bar = pyprind.ProgBar(len(resources), bar_char="â–ˆ") if len(resources) > 1 else None with self.se.BulkIndexer(batch_size=batch_size, refresh=True) as indexer: for resource in resources: if quiet is False and bar is not None: bar.update(item_id=resource) tiles = list(models.TileModel.objects.filter(resourceinstance=resource)) document, doc_id = self.get_documents_to_index(resource, tiles) if document is not None and id is not None: indexer.add(index=self.index_name, id=doc_id, data=document) self.se.refresh(index=self.index_name) result_summary["indexed"] = self.se.count(index=self.index_name, body=q.dsl) - count_before status = "Passed" if result_summary["database"] == result_summary["indexed"] else "Failed" print(f"Custom Index - {settings.ELASTICSEARCH_PREFIX}_{self.index_name}") print( f" Status: {status}, In Database: {result_summary['database']}, Indexed: {result_summary['indexed']}, Took: {(datetime.now() - start).seconds} seconds" )
def get_search_contexts(request): search_context = {} search_context = cache.get('search_contexts') if search_context is not None: #print 'Search_context iz cacha!' return search_context lang = request.GET.get('lang', settings.LANGUAGE_CODE) se1 = SearchEngineFactory().create() context_label1 = '-' search_context = {} for search_term in settings.SEARCH_TERMS: searchString1 = search_term['text'] print searchString1 query1 = Query(se1, start=0, limit=settings.SEARCH_DROPDOWN_LENGTH) boolquery1 = Bool() boolquery1.should(Match(field='term', query=searchString1.lower(), type='phrase_prefix', fuzziness='AUTO')) boolquery1.should(Match(field='term.folded', query=searchString1.lower(), type='phrase_prefix', fuzziness='AUTO')) boolquery1.should(Match(field='term.folded', query=searchString1.lower(), fuzziness='AUTO')) query1.add_query(boolquery1) results1 = query1.search(index='term', doc_type='value') conceptid1 = '' context1 = '' for result1 in results1['hits']['hits']: prefLabel = get_preflabel_from_conceptid(result1['_source']['context'], lang) result1['_source']['options']['context_label'] = prefLabel['value'] if (prefLabel['value'] == search_term['context_label'] and result1['_source']['term'] == search_term['text']): conceptid1 = result1['_source']['options']['conceptid'] context1 = result1['_source']['context'] #print search_term['context_label'] + ': ' + conceptid1 #print searchString1 #print result1 result = {'conceptid': conceptid1, 'context': context1} if context_label1 <> search_term['context_label']: value = {} print result value[search_term['text_key']] = result #print value search_context[search_term['context_key']] = value #print search_context #print 'Iscem [' + search_term['context_label'] + '][' + search_term['text'] + ']' #print value context_label1 = search_term['context_label'] #print search_context #print search_context['Historical_Period']['BRONZE_AGE'] #print 'Shranjujem search_context v cache' cache.set('search_contexts', search_context, 86400) return search_context
def delete(self, *args, **kwargs): se = SearchEngineFactory().create() request = kwargs.pop("request", None) index = kwargs.pop("index", True) transaction_id = kwargs.pop("index", None) provisional_edit_log_details = kwargs.pop( "provisional_edit_log_details", None) for tile in self.tiles: tile.delete(*args, request=request, **kwargs) try: user = request.user user_is_reviewer = user_is_resource_reviewer(user) except AttributeError: # no user user = None user_is_reviewer = True if user_is_reviewer is True or self.user_owns_provisional(user): if index: query = Query(se) bool_query = Bool() bool_query.filter(Terms(field="tileid", terms=[self.tileid])) query.add_query(bool_query) results = query.delete(index=TERMS_INDEX) self.__preDelete(request) self.save_edit( user=user, edit_type="tile delete", old_value=self.data, provisional_edit_log_details=provisional_edit_log_details, transaction_id=transaction_id, ) try: super(Tile, self).delete(*args, **kwargs) for nodeid in self.data.keys(): node = models.Node.objects.get(nodeid=nodeid) datatype = self.datatype_factory.get_instance( node.datatype) datatype.post_tile_delete(self, nodeid, index=index) if index: self.index() except IntegrityError as e: logger.error(e) else: self.apply_provisional_edit(user, data={}, action="delete") super(Tile, self).save(*args, **kwargs)
def get_resource_bounds(node): query = Query(se, start=0, limit=0) search_query = Bool() query.add_query(search_query) query.add_aggregation( GeoBoundsAgg(field="points.point", name="bounds")) query.add_query( Term(field="graph_id", term=str(node.graph.graphid))) results = query.search(index=RESOURCES_INDEX) bounds = results["aggregations"]["bounds"][ "bounds"] if "bounds" in results["aggregations"][ "bounds"] else None return bounds
def get_resource_bounds(node): query = Query(se, start=0, limit=0) search_query = Bool() query.add_query(search_query) query.add_aggregation( GeoBoundsAgg(field='points.point', name='bounds')) query.add_query( Term(field='graph_id', term=str(node.graph.graphid))) results = query.search(index='resources') bounds = results['aggregations']['bounds'][ 'bounds'] if 'bounds' in results['aggregations'][ 'bounds'] else None return bounds
def index_resources(clear_index=True, batch_size=settings.BULK_IMPORT_BATCH_SIZE): """ Indexes all resources from the database Keyword Arguments: clear_index -- set to True to remove all the resources from the index before the reindexing operation batch_size -- the number of records to index as a group, the larger the number to more memory required """ se = SearchEngineFactory().create() if clear_index: q = Query(se=se) q.delete(index='terms') resource_types = models.GraphModel.objects.filter(isresource=True).exclude(graphid=settings.SYSTEM_SETTINGS_RESOURCE_MODEL_ID).values_list('graphid', flat=True) index_resources_by_type(resource_types, clear_index=clear_index, batch_size=batch_size)
def index_resources(clear_index=True, batch_size=settings.BULK_IMPORT_BATCH_SIZE): """ Indexes all resources from the database Keyword Arguments: clear_index -- set to True to remove all the resources from the index before the reindexing operation batch_size -- the number of records to index as a group, the larger the number to more memory required """ se = SearchEngineFactory().create() if clear_index: q = Query(se=se) q.delete(index='strings', doc_type='term') resource_types = models.GraphModel.objects.filter(isresource=True).exclude(graphid=settings.SYSTEM_SETTINGS_RESOURCE_MODEL_ID).values_list('graphid', flat=True) index_resources_by_type(resource_types, clear_index=clear_index, batch_size=batch_size)
def get_indexed_concepts(se, conceptid, concept_value): """ Searches for a conceptid from the database and confirms that the database concept value matches the indexed value """ result = 'failed: cannot find' + conceptid query = Query(se, start=0, limit=100) phrase = Match(field='conceptid', query=conceptid, type='phrase_prefix') query.add_query(phrase) results = query.search(index='concept_labels') if len(results['hits']['hits']) > 0: source = results['hits']['hits'][0]['_source'] if conceptid == source['conceptid'] or concept_value == source['value']: result = 'passed' else: result = 'failed: concept value does not match' return result
def delete(self): """ Deletes a single resource and any related indexed data """ se = SearchEngineFactory().create() related_resources = self.get_related_resources(lang="en-US", start=0, limit=15) for rr in related_resources['resource_relationships']: models.ResourceXResource.objects.get(pk=rr['resourcexid']).delete() query = Query(se) bool_query = Bool() bool_query.filter(Terms(field='resourceinstanceid', terms=[self.resourceinstanceid])) query.add_query(bool_query) results = query.search(index='strings', doc_type='term')['hits']['hits'] for result in results: se.delete(index='strings', doc_type='term', id=result['_id']) se.delete(index='resource', doc_type=str(self.graph_id), id=self.resourceinstanceid) super(Resource, self).delete()
def delete(self, *args, **kwargs): se = SearchEngineFactory().create() request = kwargs.pop('request', None) for tiles in self.tiles.itervalues(): for tile in tiles: tile.delete(*args, request=request, **kwargs) query = Query(se) bool_query = Bool() bool_query.filter(Terms(field='tileid', terms=[self.tileid])) query.add_query(bool_query) results = query.search(index='strings', doc_type='term')['hits']['hits'] for result in results: se.delete(index='strings', doc_type='term', id=result['_id']) self.__preDelete(request) super(Tile, self).delete(*args, **kwargs) resource = Resource.objects.get(resourceinstanceid=self.resourceinstance.resourceinstanceid) resource.index()
def get_related_resource_ids(resourceids, lang, limit=1000, start=0): se = SearchEngineFactory().create() query = Query(se, limit=limit, start=start) query.add_filter(Terms(field='entityid1', terms=resourceids).dsl, operator='or') query.add_filter(Terms(field='entityid2', terms=resourceids).dsl, operator='or') resource_relations = query.search( index='resource_relations', doc_type='all') entityids = set() for relation in resource_relations['hits']['hits']: # add the other halves add the relations which are not in the original list of ids from_is_original_result = relation['_source']['entityid1'] in resourceids to_is_original_result = relation['_source']['entityid2'] in resourceids if from_is_original_result: entityids.add(relation['_source']['entityid2']) if to_is_original_result: entityids.add(relation['_source']['entityid1']) return entityids
def index_resources_by_type(resource_types, clear_index=True, batch_size=settings.BULK_IMPORT_BATCH_SIZE): """ Indexes all resources of a given type(s) Arguments: resource_types -- array of graph ids that represent resource types Keyword Arguments: clear_index -- set to True to remove all the resources of the types passed in from the index before the reindexing operation batch_size -- the number of records to index as a group, the larger the number to more memory required """ se = SearchEngineFactory().create() datatype_factory = DataTypeFactory() node_datatypes = {str(nodeid): datatype for nodeid, datatype in models.Node.objects.values_list('nodeid', 'datatype')} for resource_type in resource_types: start = datetime.now() resources = Resource.objects.filter(graph_id=str(resource_type)) graph_name = models.GraphModel.objects.get(graphid=str(resource_type)).name print "Indexing resource type '{0}'".format(graph_name) result_summary = {'database':len(resources), 'indexed':0} if clear_index: q = Query(se=se) q.delete(index='resource', doc_type=str(resource_type)) with se.BulkIndexer(batch_size=batch_size, refresh=True) as doc_indexer: with se.BulkIndexer(batch_size=batch_size, refresh=True) as term_indexer: for resource in resources: document, terms = resource.get_documents_to_index(fetchTiles=True, datatype_factory=datatype_factory, node_datatypes=node_datatypes) doc_indexer.add(index='resource', doc_type=document['graph_id'], id=document['resourceinstanceid'], data=document) for term in terms: term_indexer.add(index='strings', doc_type='term', id=term['_id'], data=term['_source']) result_summary['indexed'] = se.count(index='resource', doc_type=str(resource_type)) status = 'Passed' if result_summary['database'] == result_summary['indexed'] else 'Failed' print "Status: {0}, Resource Type: {1}, In Database: {2}, Indexed: {3}, Took: {4} seconds".format(status, graph_name, result_summary['database'], result_summary['indexed'], (datetime.now()-start).seconds)
def map_layers(request, entitytypeid='all', get_centroids=False): data = [] geom_param = request.GET.get('geom', None) bbox = request.GET.get('bbox', '') limit = request.GET.get('limit', settings.MAP_LAYER_FEATURE_LIMIT) entityids = request.GET.get('entityid', '') geojson_collection = { "type": "FeatureCollection", "features": [] } se = SearchEngineFactory().create() query = Query(se, limit=limit) args = { 'index': 'maplayers' } if entitytypeid != 'all': args['doc_type'] = entitytypeid if entityids != '': for entityid in entityids.split(','): geojson_collection['features'].append(se.search(index='maplayers', id=entityid)['_source']) return JSONResponse(geojson_collection) data = query.search(**args) for item in data['hits']['hits']: if get_centroids: item['_source']['geometry'] = item['_source']['properties']['centroid'] item['_source'].pop('properties', None) elif geom_param != None: item['_source']['geometry'] = item['_source']['properties'][geom_param] item['_source']['properties'].pop('extent', None) item['_source']['properties'].pop(geom_param, None) else: item['_source']['properties'].pop('extent', None) item['_source']['properties'].pop('centroid', None) geojson_collection['features'].append(item['_source']) return JSONResponse(geojson_collection)
def get_resource_bounds(node): query = Query(se, start=0, limit=0) search_query = Bool() query.add_query(search_query) query.add_aggregation(GeoBoundsAgg(field='points.point', name='bounds')) results = query.search(index='resource', doc_type=[str(node.graph.pk)]) bounds = results['aggregations']['bounds']['bounds'] if 'bounds' in results['aggregations']['bounds'] else None return bounds
def polygon_layers(request, entitytypeid='all'): data = [] geom_param = request.GET.get('geom', None) bbox = request.GET.get('bbox', '') limit = request.GET.get('limit', settings.MAP_LAYER_FEATURE_LIMIT) entityids = request.GET.get('entityid', '') geojson_collection = { "type": "FeatureCollection", "features": [] } circ_features = [] se = SearchEngineFactory().create() query = Query(se, limit=limit) args = { 'index': 'maplayers' } if entitytypeid != 'all': args['doc_type'] = entitytypeid data = query.search(**args) for item in data['hits']['hits']: for shape in item['_source']['geometry']['geometries']: feat = { "geometry":shape, "type":"Feature", "id":item['_source']['id'] } if item['_source']['properties']['primaryname'] == "Circulation": circ_features.append(feat) continue geojson_collection['features'].append(feat) for circ_feat in circ_features: geojson_collection['features'].append(circ_feat) return JSONResponse(geojson_collection)
def search_terms(request): lang = request.GET.get('lang', settings.LANGUAGE_CODE) se = SearchEngineFactory().create() searchString = request.GET.get('q', '') query = Query(se, start=0, limit=0) user_is_reviewer = request.user.groups.filter(name='Resource Reviewer').exists() boolquery = Bool() boolquery.should(Match(field='value', query=searchString.lower(), type='phrase_prefix', fuzziness='AUTO')) boolquery.should(Match(field='value.folded', query=searchString.lower(), type='phrase_prefix', fuzziness='AUTO')) boolquery.should(Match(field='value.folded', query=searchString.lower(), fuzziness='AUTO')) if user_is_reviewer is False: boolquery.filter(Terms(field='provisional', terms=['false'])) query.add_query(boolquery) base_agg = Aggregation(name='value_agg', type='terms', field='value.raw', size=settings.SEARCH_DROPDOWN_LENGTH, order={"max_score": "desc"}) nodegroupid_agg = Aggregation(name='nodegroupid', type='terms', field='nodegroupid') top_concept_agg = Aggregation(name='top_concept', type='terms', field='top_concept') conceptid_agg = Aggregation(name='conceptid', type='terms', field='conceptid') max_score_agg = MaxAgg(name='max_score', script='_score') top_concept_agg.add_aggregation(conceptid_agg) base_agg.add_aggregation(max_score_agg) base_agg.add_aggregation(top_concept_agg) base_agg.add_aggregation(nodegroupid_agg) query.add_aggregation(base_agg) results = query.search(index='strings') or {'hits': {'hits':[]}} i = 0; ret = [] for result in results['aggregations']['value_agg']['buckets']: if len(result['top_concept']['buckets']) > 0: for top_concept in result['top_concept']['buckets']: top_concept_id = top_concept['key'] top_concept_label = get_preflabel_from_conceptid(top_concept['key'], lang)['value'] for concept in top_concept['conceptid']['buckets']: ret.append({ 'type': 'concept', 'context': top_concept_id, 'context_label': top_concept_label, 'id': i, 'text': result['key'], 'value': concept['key'] }) i = i + 1 else: ret.append({ 'type': 'term', 'context': '', 'context_label': get_resource_model_label(result), 'id': i, 'text': result['key'], 'value': result['key'] }) i = i + 1 return JSONResponse(ret)
def index_resource_relations(clear_index=True, batch_size=settings.BULK_IMPORT_BATCH_SIZE): """ Indexes all resource to resource relation records Keyword Arguments: clear_index -- set to True to remove all the resources from the index before the reindexing operation batch_size -- the number of records to index as a group, the larger the number to more memory required """ start = datetime.now() print "Indexing resource to resource relations" cursor = connection.cursor() se = SearchEngineFactory().create() if clear_index: q = Query(se=se) q.delete(index='resource_relations') with se.BulkIndexer(batch_size=batch_size, refresh=True) as resource_relations_indexer: sql = """ SELECT resourcexid, resourceinstanceidfrom, notes, relationshiptype, resourceinstanceidto FROM public.resource_x_resource; """ cursor.execute(sql) for resource_relation in cursor.fetchall(): doc = { 'resourcexid': resource_relation[0], 'resourceinstanceidfrom': resource_relation[1], 'notes': resource_relation[2], 'relationshiptype': resource_relation[3], 'resourceinstanceidto': resource_relation[4] } resource_relations_indexer.add(index='resource_relations', doc_type='all', id=doc['resourcexid'], data=doc) index_count = se.count(index='resource_relations') print "Status: {0}, In Database: {1}, Indexed: {2}, Took: {3} seconds".format('Passed' if cursor.rowcount == index_count else 'Failed', cursor.rowcount, index_count, (datetime.now()-start).seconds)
def clear_resources(): """Removes all resource instances from your db and elasticsearch resource index""" se = SearchEngineFactory().create() match_all_query = Query(se) match_all_query.delete(index='strings', doc_type='term') match_all_query.delete(index='resource') match_all_query.delete(index='resource_relations') print 'deleting', Resource.objects.count(), 'resources' cursor = connection.cursor() cursor.execute("TRUNCATE public.resource_instances CASCADE;" ) print Resource.objects.count(), 'resources remaining' print 'deleting', models.ResourceXResource.objects.count(), 'resource relationships' cursor.execute("TRUNCATE public.resource_x_resource CASCADE;" ) print models.ResourceXResource.objects.count(), 'resource relationships remaining'
def time_wheel_config(request): se = SearchEngineFactory().create() query = Query(se, limit=0) query.add_aggregation(MinAgg(field='dates', format='y')) query.add_aggregation(MaxAgg(field='dates', format='y')) results = query.search(index='resource') if results is not None and results['aggregations']['min_dates']['value'] is not None and results['aggregations']['max_dates']['value'] is not None: min_date = int(results['aggregations']['min_dates']['value_as_string']) max_date = int(results['aggregations']['max_dates']['value_as_string']) # round min and max date to the nearest 1000 years min_date = math.ceil(math.fabs(min_date)/1000)*-1000 if min_date < 0 else math.floor(min_date/1000)*1000 max_date = math.floor(math.fabs(max_date)/1000)*-1000 if max_date < 0 else math.ceil(max_date/1000)*1000 query = Query(se, limit=0) for millennium in range(int(min_date),int(max_date)+1000,1000): min_millenium = millennium max_millenium = millennium + 1000 millenium_agg = DateRangeAgg(name="Millennium (%s-%s)"%(min_millenium, max_millenium), field='dates', format='y', min_date=str(min_millenium), max_date=str(max_millenium)) for century in range(min_millenium,max_millenium,100): min_century = century max_century = century + 100 century_aggregation = DateRangeAgg(name="Century (%s-%s)"%(min_century, max_century), field='dates', format='y', min_date=str(min_century), max_date=str(max_century)) millenium_agg.add_aggregation(century_aggregation) for decade in range(min_century,max_century,10): min_decade = decade max_decade = decade + 10 decade_aggregation = DateRangeAgg(name="Decade (%s-%s)"%(min_decade, max_decade), field='dates', format='y', min_date=str(min_decade), max_date=str(max_decade)) century_aggregation.add_aggregation(decade_aggregation) query.add_aggregation(millenium_agg) root = d3Item(name='root') transformESAggToD3Hierarchy({'buckets':[query.search(index='resource')['aggregations']]}, root) return JSONResponse(root, indent=4) else: return HttpResponseNotFound(_('Error retrieving the time wheel config'))