def prepare_resource_relations_index(self, create=False): """ Creates the settings and mappings in Elasticsearch to support related resources """ index_settings = { 'mappings':{ 'all': { 'properties': { 'resourcexid': {'type': 'long'}, 'notes': { 'type': 'string'}, 'relationshiptype': {'type': 'string', 'index' : 'not_analyzed'}, 'entityid2': {'type': 'string', 'index' : 'not_analyzed'}, 'entityid1': {'type': 'string', 'index' : 'not_analyzed'} } } } } if create: se = SearchEngineFactory().create() se.create_index(index='resource_relations', body=index_settings, ignore=400) return index_settings
def resource_manager(request, resourcetypeid='', form_id='default', resourceid=''): if resourceid != '': resource = Resource(resourceid) elif resourcetypeid != '': resource = Resource({'entitytypeid': resourcetypeid}) if form_id == 'default': form_id = resource.form_groups[0]['forms'][0]['id'] form = resource.get_form(form_id) if request.method == 'DELETE': resource.delete_index() se = SearchEngineFactory().create() realtionships = resource.get_related_resources(return_entities=False) for realtionship in realtionships: se.delete(index='resource_relations', doc_type='all', id=realtionship.resourcexid) realtionship.delete() resource.delete() return JSONResponse({ 'success': True }) if request.method == 'POST': data = JSONDeserializer().deserialize(request.POST.get('formdata', {})) form.update(data, request.FILES) with transaction.atomic(): if resourceid != '': resource.delete_index() resource.save(user=request.user) resource.index() resourceid = resource.entityid return redirect('resource_manager', resourcetypeid=resourcetypeid, form_id=form_id, resourceid=resourceid) min_max_dates = models.Dates.objects.aggregate(Min('val'), Max('val')) if request.method == 'GET': if form != None: lang = request.GET.get('lang', settings.LANGUAGE_CODE) form.load(lang) return render(request, 'resource-manager.htm', { 'form': form, 'formdata': JSONSerializer().serialize(form.data), 'form_template': 'views/forms/' + form_id + '.htm', 'form_id': form_id, 'resourcetypeid': resourcetypeid, 'resourceid': resourceid, 'main_script': 'resource-manager', 'active_page': 'ResourceManger', 'resource': resource, 'resource_name': resource.get_primary_name(), 'resource_type_name': resource.get_type_name(), 'form_groups': resource.form_groups, 'min_date': min_max_dates['val__min'].year if min_max_dates['val__min'] != None else 0, 'max_date': min_max_dates['val__max'].year if min_max_dates['val__min'] != None else 1, 'timefilterdata': JSONSerializer().serialize(Concept.get_time_filter_data()), }) else: return HttpResponseNotFound('<h1>Arches form not found.</h1>')
def delete_index(self): se = SearchEngineFactory().create() query = Query(se, start=0, limit=10000) phrase = Match(field='conceptid', query=self.conceptid, type='phrase') query.add_query(phrase) query.delete(index='concept_labels') se.delete_terms(self.id)
def index_resources(): """ Deletes any existing indicies from elasticsearch related to resources and then indexes all resources from the database """ result_summary = {} se = SearchEngineFactory().create() # clear existing indexes for index_type in ['resource_relations', 'entity', 'resource', 'maplayers']: se.delete_index(index=index_type) se.delete(index='term', body='{"query":{"bool":{"must":[{"constant_score":{"filter":{"missing":{"field":"value.options.conceptid"}}}}],"must_not":[],"should":[]}}}') Resource().prepare_term_index(create=True) cursor = connection.cursor() cursor.execute("""select entitytypeid from data.entity_types where isresource = TRUE""") resource_types = cursor.fetchall() Resource().prepare_resource_relations_index(create=True) for resource_type in resource_types: Resource().prepare_search_index(resource_type[0], create=True) index_resources_by_type(resource_types, result_summary) se.es.indices.refresh(index='entity') for resource_type in resource_types: result_summary[resource_type[0]]['indexed'] = se.es.count(index="entity", doc_type=resource_type[0])['count'] print '\nResource Index Results:' for k, v in result_summary.iteritems(): status = 'Passed' if v['database'] == v['indexed'] else 'failed' print "Status: {0}, Resource Type: {1}, In Database: {2}, Indexed: {3}".format(status, k, v['database'], v['indexed'])
def get_scheme_id(self): se = SearchEngineFactory().create() result = se.search(index='concept_labels', id=self.id) if result['found']: return Concept(result['_type']) else: return None
def get_related_resources(self, lang='en-US', limit=1000, start=0): """ Returns an object that lists the related resources, the relationship types, and a reference to the current resource """ ret = { 'resource_instance': self, 'resource_relationships': [], 'related_resources': [] } se = SearchEngineFactory().create() query = Query(se, limit=limit, start=start) bool_filter = Bool() bool_filter.should(Terms(field='resourceinstanceidfrom', terms=self.resourceinstanceid)) bool_filter.should(Terms(field='resourceinstanceidto', terms=self.resourceinstanceid)) query.add_query(bool_filter) resource_relations = query.search(index='resource_relations', doc_type='all') ret['total'] = resource_relations['hits']['total'] instanceids = set() for relation in resource_relations['hits']['hits']: relation['_source']['preflabel'] = get_preflabel_from_valueid(relation['_source']['relationshiptype'], lang) ret['resource_relationships'].append(relation['_source']) instanceids.add(relation['_source']['resourceinstanceidto']) instanceids.add(relation['_source']['resourceinstanceidfrom']) if len(instanceids) > 0: instanceids.remove(str(self.resourceinstanceid)) related_resources = se.search(index='resource', doc_type='_all', id=list(instanceids)) if related_resources: for resource in related_resources['docs']: ret['related_resources'].append(resource['_source']) return ret
def index_resources_by_type(resource_types, result_summary): """ Collects and indexes all resources """ for resource_type in resource_types: resources = archesmodels.Entities.objects.filter(entitytypeid = resource_type) print "Indexing {0} {1} resources".format(len(resources), resource_type[0]) result_summary[resource_type[0]] = {'database':len(resources), 'indexed':0} errors = [] for resource in resources: try: resource = Resource().get(resource.entityid) resource.index() except Exception as e: if e not in errors: errors.append(e) if len(errors) > 0: print errors[0], ':', len(errors) se = SearchEngineFactory().create() related_resource_records = archesmodels.RelatedResource.objects.all() for related_resource_record in related_resource_records: se.index_data(index='resource_relations', doc_type='all', body=model_to_dict(related_resource_record), idfield='resourcexid') return result_summary
def get_preflabel_from_valueid(valueid, lang): se = SearchEngineFactory().create() concept_label = se.search(index='concept_labels', id=valueid) if concept_label['found']: # print "ConceptID from ValueID: %s" % get_concept_label_from_valueid(valueid) return get_preflabel_from_conceptid(get_concept_label_from_valueid(valueid)['conceptid'], lang)
def get_scheme_id(self): se = SearchEngineFactory().create() result = se.search(index='strings', doc_type='concept', id=self.id) if result['found']: return Concept(result['top_concept']) else: return None
def prepare_search_index(self, resource_type_id, create=False): """ Creates the settings and mappings in Elasticsearch to support resource search """ index_settings = super(Resource, self).prepare_search_index(resource_type_id, create=False) index_settings['mappings'][resource_type_id]['properties']['date_groups'] = { 'properties' : { 'conceptid': {'type' : 'string', 'index' : 'not_analyzed'} } } #index_settings['mappings'][resource_type_id]['properties']['measurement_groups'] = { # 'properties' : { # 'conceptid': {'type' : 'string', 'index' : 'not_analyzed'} # } #} if create: se = SearchEngineFactory().create() try: se.create_index(index='entity', body=index_settings) except: index_settings = index_settings['mappings'] se.create_mapping(index='entity', doc_type=resource_type_id, body=index_settings)
def prepare_resource_relations_index(create=False): """ Creates the settings and mappings in Elasticsearch to support related resources """ index_settings = { 'mappings': { 'all': { 'properties': { 'resourcexid': {'type': 'keyword'}, 'notes': {'type': 'text'}, 'relationshiptype': {'type': 'keyword'}, 'resourceinstanceidfrom': {'type': 'keyword'}, 'resourceinstanceidto': {'type': 'keyword'}, 'created': {'type': 'keyword'}, 'modified': {'type': 'keyword'} } } } } if create: se = SearchEngineFactory().create() se.create_index(index='resource_relations', body=index_settings, ignore=400) return index_settings
def related_resources(request, resourceid): ## get allowed resource types based on permissions allowedtypes = get_allowed_types(request) is_anon = False if request.user.username == "anonymous": is_anon = True if request.method == 'GET': lang = request.GET.get('lang', settings.LANGUAGE_CODE) start = request.GET.get('start', 0) resources = get_related_resources(resourceid, lang, start=start, limit=15, allowedtypes=allowedtypes, is_anon=is_anon) return JSONResponse(resources, indent=4) if 'edit' in request.user.user_groups and request.method == 'DELETE': se = SearchEngineFactory().create() data = JSONDeserializer().deserialize(request.body) entityid1 = data.get('entityid1') entityid2 = data.get('entityid2') resourcexid = data.get('resourcexid') realtionshiptype = data.get('realtionshiptype') resource = Resource(entityid1) resource.delete_resource_relationship(entityid2, realtionshiptype) se.delete(index='resource_relations', doc_type='all', id=resourcexid) return JSONResponse({ 'success': True })
def get_related_resources(resourceid, lang, limit=1000, start=0): ret = { 'resource_relationships': [], 'related_resources': [] } se = SearchEngineFactory().create() query = Query(se, limit=limit, start=start) query.add_filter(Terms(field='entityid1', terms=resourceid).dsl, operator='or') query.add_filter(Terms(field='entityid2', terms=resourceid).dsl, operator='or') resource_relations = query.search(index='resource_relations', doc_type='all') ret['total'] = resource_relations['hits']['total'] entityids = set() for relation in resource_relations['hits']['hits']: relation['_source']['preflabel'] = get_preflabel_from_valueid(relation['_source']['relationshiptype'], lang) ret['resource_relationships'].append(relation['_source']) entityids.add(relation['_source']['entityid1']) entityids.add(relation['_source']['entityid2']) if len(entityids) > 0: entityids.remove(resourceid) related_resources = se.search(index='entity', doc_type='_all', id=list(entityids)) if related_resources: for resource in related_resources['docs']: ret['related_resources'].append(resource['_source']) return ret
def save(self): from arches.app.search.search_engine_factory import SearchEngineFactory se = SearchEngineFactory().create() if not self.created: self.created = datetime.datetime.now() self.modified = datetime.datetime.now() document = model_to_dict(self) se.index_data(index='resource_relations', doc_type='all', body=document, idfield='resourcexid') super(ResourceXResource, self).save()
def get_resource_names(self, nodevalue): resource_names = set([]) es = Elasticsearch() se = SearchEngineFactory().create() id_list = self.get_id_list(nodevalue) for resourceid in id_list: print resourceid resource_document = se.search(index='resource', doc_type='_all', id=resourceid) resource_names.add(resource_document['_source']['displayname']) return resource_names
def get_related_resources(resourceid, lang, limit=1000, start=0, allowedtypes=[], is_anon=False): ret = { 'resource_relationships': [], 'related_resources': [] } se = SearchEngineFactory().create() query = Query(se, limit=limit, start=start) query.add_filter(Terms(field='entityid1', terms=resourceid).dsl, operator='or') query.add_filter(Terms(field='entityid2', terms=resourceid).dsl, operator='or') resource_relations = query.search(index='resource_relations', doc_type="all") entityids = set() for relation in resource_relations['hits']['hits']: relation['_source']['preflabel'] = get_preflabel_from_valueid(relation['_source']['relationshiptype'], lang) ret['resource_relationships'].append(relation['_source']) entityids.add(relation['_source']['entityid1']) entityids.add(relation['_source']['entityid2']) if len(entityids) > 0: entityids.remove(resourceid) # can't figure why passing allowed types to doc_type param doesn't work, # so filter is carried out later related_resources = se.search(index='entity', doc_type='_all', id=list(entityids)) filtered_ids = [] if related_resources: for resource in related_resources['docs']: if not resource['_type'] in allowedtypes: filtered_ids.append(resource['_source']['entityid']) continue if is_anon: # filter out protected resources if user is anonymous # (this is basically a subset of the get_protected_entityids below # they should be combined probably) from search import get_protection_conceptids protect_id = get_protection_conceptids(settings.PROTECTION_LEVEL_NODE) conceptids = [d['conceptid'] for d in resource['_source']['domains']] if protect_id in conceptids: filtered_ids.append(resource['_source']['entityid']) continue ret['related_resources'].append(resource['_source']) if len(filtered_ids) > 0: # remove all relationships in ret that match a filtered id (this lc is yuge but I think concise) filtered_relationships = [rel for rel in ret['resource_relationships'] if not rel['entityid1'] in filtered_ids and not rel['entityid2'] in filtered_ids] # update ret values ret['resource_relationships'] = filtered_relationships ret['total'] = len(ret['resource_relationships']) return ret
def index(self, scheme=None): if self.category == 'label': se = SearchEngineFactory().create() data = JSONSerializer().serializeToPython(self) if scheme == None: scheme = self.get_scheme_id() if scheme == None: raise Exception('Index of label failed. Index type (scheme id) could not be derived from the label.') data['top_concept'] = scheme.id se.index_data('strings', 'concept', data, 'id')
def map_layers(request, entitytypeid='all', get_centroids=False): data = [] geom_param = request.GET.get('geom', None) bbox = request.GET.get('bbox', '') limit = request.GET.get('limit', settings.MAP_LAYER_FEATURE_LIMIT) entityids = request.GET.get('entityid', '') geojson_collection = { "type": "FeatureCollection", "features": [] } se = SearchEngineFactory().create() query = Query(se, limit=limit) args = { 'index': 'maplayers' } if entitytypeid != 'all': args['doc_type'] = entitytypeid if entityids != '': for entityid in entityids.split(','): geojson_collection['features'].append(se.search(index='maplayers', id=entityid)['_source']) return JSONResponse(geojson_collection) data = query.search(**args) if not data: return JSONResponse({}) for item in data['hits']['hits']: # Ce uporabnik ni avtenticiran, prikazemo le veljavne (to je verjetno potrebno se dodelati (mogoce da vidijo le svoje???)!!!) if (not request.user.username != 'anonymous'): if (item['_source']['properties']['ewstatus'] != settings.PUBLISHED_LABEL): continue if get_centroids: item['_source']['geometry'] = item['_source']['properties']['centroid'] #item['_source'].pop('properties', None) item['_source']['properties'].pop('extent', None) item['_source']['properties'].pop('elements', None) item['_source']['properties'].pop('entitytypeid', None) item['_source']['properties'].pop('constructions', None) item['_source']['properties'].pop('centroid', None) item['_source']['properties'].pop('ewstatus', None) item['_source']['properties'].pop('address', None) item['_source']['properties'].pop('designations', None) item['_source']['properties'].pop('primaryname', None) item['_source']['properties'].pop('resource_type', None) elif geom_param != None: item['_source']['geometry'] = item['_source']['properties'][geom_param] item['_source']['properties'].pop('extent', None) item['_source']['properties'].pop(geom_param, None) else: item['_source']['properties'].pop('extent', None) item['_source']['properties'].pop('centroid', None) geojson_collection['features'].append(item['_source']) return JSONResponse(geojson_collection)
def map_layers(request, entitytypeid='all', get_centroids=False): data = [] geom_param = request.GET.get('geom', None) bbox = request.GET.get('bbox', '') limit = request.GET.get('limit', settings.MAP_LAYER_FEATURE_LIMIT) entityids = request.GET.get('entityid', '') geojson_collection = { "type": "FeatureCollection", "features": [] } se = SearchEngineFactory().create() query = Query(se, limit=limit) args = { 'index': 'maplayers' } if entitytypeid != 'all': args['doc_type'] = entitytypeid if entityids != '': for entityid in entityids.split(','): geojson_collection['features'].append(se.search(index='maplayers', id=entityid)['_source']) return JSONResponse(geojson_collection) data = query.search(**args) # if anonymous user, get list of protected entity ids to be excluded from map protected = [] if request.user.username == 'anonymous': protected = get_protected_entityids() print protected for item in data['hits']['hits']: if item['_id'] in protected: print "hide this one" print json.dumps(item,indent=2) continue if get_centroids: item['_source']['geometry'] = item['_source']['properties']['centroid'] item['_source'].pop('properties', None) elif geom_param != None: item['_source']['geometry'] = item['_source']['properties'][geom_param] item['_source']['properties'].pop('extent', None) item['_source']['properties'].pop(geom_param, None) else: item['_source']['properties'].pop('extent', None) item['_source']['properties'].pop('centroid', None) geojson_collection['features'].append(item['_source']) return JSONResponse(geojson_collection)
def setUpClass(cls): se = SearchEngineFactory().create() se.delete_index(index="concept_labels") se.delete_index(index="term") se.create_index(index="concept_labels") se.create_index(index="term") management.call_command( "packages", operation="import_json", source="tests/fixtures/resource_graphs/archesv4_resource.json" )
def get(self, request, resourceid=None): if resourceid is not None: se = SearchEngineFactory().create() document = se.search(index='resource', doc_type='_all', id=resourceid) resource = Resource.objects.get(pk=resourceid) return JSONResponse({ 'graphid': document['_source']['graph_id'], 'graph_name': resource.graph.name, 'displaydescription': document['_source']['displaydescription'], 'map_popup': document['_source']['map_popup'], 'displayname': document['_source']['displayname'], 'geometries': document['_source']['geometries'], }) return HttpResponseNotFound()
def index(self): """ Indexes all the nessesary items values of a resource to support search """ se = SearchEngineFactory().create() datatype_factory = DataTypeFactory() node_datatypes = {str(nodeid): datatype for nodeid, datatype in models.Node.objects.values_list('nodeid', 'datatype')} document, terms = self.get_documents_to_index(datatype_factory=datatype_factory, node_datatypes=node_datatypes) se.index_data('resource', self.graph_id, JSONSerializer().serializeToPython(document), id=self.pk) for term in terms: se.index_data('strings', 'term', term['_source'], id=term['_id'])
def index(self): """ Indexes all the nessesary items values of a resource to support search """ if unicode(self.graph_id) != unicode(settings.SYSTEM_SETTINGS_RESOURCE_MODEL_ID): se = SearchEngineFactory().create() datatype_factory = DataTypeFactory() node_datatypes = {str(nodeid): datatype for nodeid, datatype in models.Node.objects.values_list('nodeid', 'datatype')} document, terms = self.get_documents_to_index(datatype_factory=datatype_factory, node_datatypes=node_datatypes) document['root_ontology_class'] = self.get_root_ontology() se.index_data('resource', self.graph_id, JSONSerializer().serializeToPython(document), id=self.pk) for term in terms: se.index_data('strings', 'term', term['_source'], id=term['_id'])
def IndexConceptFixer(source): """ Simple utility to delete the ES index of a given list of conceptids """ with open(source, 'rb') as csvfile: reader = csv.DictReader(csvfile, delimiter= '|') se = SearchEngineFactory().create() for row in reader: try: conceptvalues = Values.objects.filter(conceptid = row['conceptid']) for conceptvalue in conceptvalues: se.delete_terms(conceptvalue.valueid) except: print "Concept Value %s does not exist" % row['conceptid']
def index_concepts_for_search(self): # see http://sqlblog.com/blogs/adam_machanic/archive/2006/07/12/swinging-from-tree-to-tree-using-ctes-part-1-adjacency-to-nested-sets.aspx # Value of Lft for the root node is 1 # Value of Rgt for the root node is 2 * (Number of nodes) # Value of Lft for any node is ((Number of nodes visited) * 2) - (Level of current node) # Value of Rgt for any node is (Lft value) + ((Number of subnodes) * 2) + 1 sys.setrecursionlimit(3000) se = SearchEngineFactory().create() se.create_mapping('concept', 'all', 'conceptid', 'string', 'not_analyzed') se.create_mapping('concept', 'all', 'labelid', 'string', 'not_analyzed') def _findNarrowerConcept(conceptid, ret=None, limit=200000, level=1): returnobj = {'subnodes': 0} if ret == None: # the root node labels = archesmodels.Values.objects.filter(conceptid = conceptid) ret = {} nodesvisited = len(ret) + 1 ret[conceptid] = {'labels': [], 'left': (nodesvisited*2)-level, 'right': 0} for label in labels: ret[conceptid]['labels'].append({'labelid': label.pk, 'label': label.value}) level = level + 1 conceptrealations = archesmodels.ConceptRelations.objects.filter(conceptidfrom = conceptid) for relation in conceptrealations: nodesvisited = len(ret) + 1 labels = archesmodels.Values.objects.filter(conceptid = relation.conceptidto) ret[relation.conceptidto_id] = {'labels': [], 'left': (nodesvisited*2)-level, 'right': 0} for label in labels: ret[relation.conceptidto_id]['labels'].append({'labelid': label.pk, 'label': label.value}) returnobj = _findNarrowerConcept(relation.conceptidto_id, ret=ret, level=level+1) subnodes = returnobj['subnodes'] if subnodes == 0: # meaning we're at a leaf node ret[conceptid]['right'] = ret[conceptid]['left'] + 1 else: ret[conceptid]['right'] = subnodes + 1 return {'all_concepts': ret, 'subnodes': ret[conceptid]['right']} concepts = _findNarrowerConcept('00000000-0000-0000-0000-000000000003') all_concepts = [] for key, concept in concepts['all_concepts'].iteritems(): all_concepts.append({'conceptid': key, 'labels': concept['labels'], 'left': concept['left'], 'right': concept['right']}) self.index(all_concepts, 'concept', 'all', 'conceptid')
def related_resources(request, resourceid): if request.method == 'GET': lang = request.GET.get('lang', settings.LANGUAGE_CODE) start = request.GET.get('start', 0) return JSONResponse(get_related_resources(resourceid, lang, start=start, limit=15), indent=4) if 'edit' in request.user.user_groups and request.method == 'DELETE': se = SearchEngineFactory().create() data = JSONDeserializer().deserialize(request.body) entityid1 = data.get('entityid1') entityid2 = data.get('entityid2') resourcexid = data.get('resourcexid') realtionshiptype = data.get('realtionshiptype') resource = Resource(entityid1) resource.delete_resource_relationship(entityid2, realtionshiptype) se.delete(index='resource_relations', doc_type='all', id=resourcexid) return JSONResponse({ 'success': True })
def index_concepts(): """ Collects all concepts and indexes both concepts and concept_labels """ se = SearchEngineFactory().create() se.delete_index(index='concept_labels') se.delete(index='term', body='{"query":{"bool":{"must_not":[{"constant_score":{"filter":{"missing":{"field":"value.options.conceptid"}}}}],"must":[],"should":[]}}}') Resource().prepare_term_index(create=True) print 'indexing concepts' start = datetime.now() cursor = connection.cursor() cursor.execute("""select conceptid from concepts.concepts""") conceptids = cursor.fetchall() for c in conceptids: if c[0] not in CORE_CONCEPTS: concept = Concept().get(id=c[0], include_subconcepts=True, include_parentconcepts=False, include=['label']) concept.index() end = datetime.now() duration = end - start print 'indexing concepts required', duration.seconds, 'seconds' cursor = connection.cursor() sql = """ select conceptid, conceptlabel from concepts.vw_concepts where conceptid not in ('%s') """ % ("','".join(CORE_CONCEPTS)) cursor.execute(sql) concepts = cursor.fetchall() concept_index_results = {'count':len(concepts), 'passed':0, 'failed':0} for conceptid, conceptvalue in concepts: result = get_indexed_concepts(se, conceptid, conceptvalue) if result != 'passed': concept_index_results['failed'] += 1 else: concept_index_results['passed'] += 1 status = 'Passed' if concept_index_results['failed'] == 0 else 'Failed' print '\nConcept Index Results:' print "Status: {0}, In Database: {1}, Indexed: {2}".format(status, concept_index_results['count'], concept_index_results['passed'])
def get_protected_entityids(): '''returns list of entity ids for protected resources''' from search import get_protection_conceptids protect_id = get_protection_conceptids(settings.PROTECTION_LEVEL_NODE) filtered_ids = [] se = SearchEngineFactory().create() # for some reason doc_type must be speficied with INFORMATION RESOURCE in order for that type # to be queried. right now this is ok, because it's the only type with protection levels, # but this is very strange. all_resources = se.search(index='entity', doc_type="INFORMATION_RESOURCE.E73")['hits']['hits'] for resource in all_resources: conceptids = [d['conceptid'] for d in resource['_source']['domains']] if protect_id in conceptids: filtered_ids.append(resource['_source']['entityid']) return filtered_ids
def bulk_save(resources): """ Saves and indexes a list of resources Arguments: resources -- a list of resource models """ se = SearchEngineFactory().create() datatype_factory = DataTypeFactory() node_datatypes = {str(nodeid): datatype for nodeid, datatype in models.Node.objects.values_list('nodeid', 'datatype')} tiles = [] documents = [] term_list = [] # flatten out the nested tiles into a single array for resource in resources: for parent_tile in resource.tiles: for child_tile in parent_tile.tiles.itervalues(): if len(child_tile) > 0: resource.tiles.extend(child_tile) parent_tile.tiles = {} tiles.extend(resource.tiles) # need to save the models first before getting the documents for index Resource.objects.bulk_create(resources) TileModel.objects.bulk_create(tiles) for resource in resources: resource.save_edit(edit_type='create') document, terms = resource.get_documents_to_index(fetchTiles=False, datatype_factory=datatype_factory, node_datatypes=node_datatypes) document['root_ontology_class'] = resource.get_root_ontology() documents.append(se.create_bulk_item(index='resource', doc_type=document['graph_id'], id=document['resourceinstanceid'], data=document)) for term in terms: term_list.append(se.create_bulk_item(index='strings', doc_type='term', id=term['_id'], data=term['_source'])) for tile in tiles: tile.save_edit(edit_type='tile create', new_value=tile.data) # bulk index the resources, tiles and terms se.bulk_index(documents) se.bulk_index(term_list)
def add_resource_relation(entityid1, entityid2, relationship_type_string): # find the relationship type se = SearchEngineFactory().create() try: logging.warning("finding relationship: %s", relationship_type_string) value = models.Values.objects.get( value__icontains=relationship_type_string) relationship = models.RelatedResource(entityid1=entityid1, entityid2=entityid2, relationshiptype=value.pk) relationship.save() se.index_data(index='resource_relations', doc_type='all', body=model_to_dict(relationship), idfield='resourcexid') logging.warning("Added relationship") except Exception as e: logging.warning("Unable to create relation %s to %s. %s", entityid1, entityid2, e)
def delete(self, *args, **kwargs): se = SearchEngineFactory().create() request = kwargs.pop('request', None) for tiles in self.tiles.itervalues(): for tile in tiles: tile.delete(*args, request=request, **kwargs) query = Query(se) bool_query = Bool() bool_query.filter(Terms(field='tileid', terms=[self.tileid])) query.add_query(bool_query) results = query.search(index='strings', doc_type='term')['hits']['hits'] for result in results: se.delete(index='strings', doc_type='term', id=result['_id']) self.__preDelete(request) self.save_edit(user=request.user, edit_type='tile delete', old_value=self.data) super(Tile, self).delete(*args, **kwargs) resource = Resource.objects.get(resourceinstanceid=self.resourceinstance.resourceinstanceid) resource.index()
def get(self, request, resourceid=None): if Resource.objects.filter(pk=resourceid).exclude(pk=settings.SYSTEM_SETTINGS_RESOURCE_ID).exists(): try: resource = Resource.objects.get(pk=resourceid) se = SearchEngineFactory().create() document = se.search(index="resources", id=resourceid) return JSONResponse( { "graphid": document["_source"]["graph_id"], "graph_name": resource.graph.name, "displaydescription": document["_source"]["displaydescription"], "map_popup": document["_source"]["map_popup"], "displayname": document["_source"]["displayname"], "geometries": document["_source"]["geometries"], } ) except Exception as e: logger.exception(_("Failed to fetch resource instance descriptors")) return HttpResponseNotFound()
def build_search_terms_dsl(request): se = SearchEngineFactory().create() searchString = request.GET.get('q', '') query = Query(se, start=0, limit=settings.SEARCH_DROPDOWN_LENGTH) boolquery = Bool() boolquery.should(Match(field='term', query=searchString.lower(), type='phrase_prefix', fuzziness='AUTO')) boolquery.should(Match(field='term.folded', query=searchString.lower(), type='phrase_prefix', fuzziness='AUTO')) boolquery.should(Match(field='term.folded', query=searchString.lower(), fuzziness='AUTO')) query.add_query(boolquery) return query
def get_related_resources(self, lang='en-US', limit=1000, start=0): """ Returns an object that lists the related resources, the relationship types, and a reference to the current resource """ ret = { 'resource_instance': self, 'resource_relationships': [], 'related_resources': [] } se = SearchEngineFactory().create() query = Query(se, limit=limit, start=start) bool_filter = Bool() bool_filter.should( Terms(field='resourceinstanceidfrom', terms=self.resourceinstanceid)) bool_filter.should( Terms(field='resourceinstanceidto', terms=self.resourceinstanceid)) query.add_query(bool_filter) resource_relations = query.search(index='resource_relations', doc_type='all') ret['total'] = resource_relations['hits']['total'] instanceids = set() for relation in resource_relations['hits']['hits']: relation['_source']['preflabel'] = get_preflabel_from_valueid( relation['_source']['relationshiptype'], lang) ret['resource_relationships'].append(relation['_source']) instanceids.add(relation['_source']['resourceinstanceidto']) instanceids.add(relation['_source']['resourceinstanceidfrom']) if len(instanceids) > 0: instanceids.remove(str(self.resourceinstanceid)) related_resources = se.search(index='resource', doc_type='_all', id=list(instanceids)) if related_resources: for resource in related_resources['docs']: ret['related_resources'].append(resource['_source']) return ret
def prepare_resource_relations_index(create=False): """ Creates the settings and mappings in Elasticsearch to support related resources """ index_settings = { 'mappings': { 'all': { 'properties': { 'resourcexid': { 'type': 'string', 'index': 'not_analyzed' }, 'notes': { 'type': 'string' }, 'relationshiptype': { 'type': 'string', 'index': 'not_analyzed' }, 'resourceinstanceidfrom': { 'type': 'string', 'index': 'not_analyzed' }, 'resourceinstanceidto': { 'type': 'string', 'index': 'not_analyzed' } } } } } if create: se = SearchEngineFactory().create() se.create_index(index='resource_relations', body=index_settings, ignore=400) return index_settings
def delete(self, *args, **kwargs): se = SearchEngineFactory().create() request = kwargs.pop("request", None) provisional_edit_log_details = kwargs.pop("provisional_edit_log_details", None) for tile in self.tiles: tile.delete(*args, request=request, **kwargs) try: user = request.user user_is_reviewer = user_is_resource_reviewer(user) except AttributeError: # no user user = None user_is_reviewer = True if user_is_reviewer is True or self.user_owns_provisional(user): query = Query(se) bool_query = Bool() bool_query.filter(Terms(field="tileid", terms=[self.tileid])) query.add_query(bool_query) results = query.search(index=TERMS_INDEX)["hits"]["hits"] for result in results: se.delete(index=TERMS_INDEX, id=result["_id"]) self.__preDelete(request) self.save_edit( user=request.user, edit_type="tile delete", old_value=self.data, provisional_edit_log_details=provisional_edit_log_details ) try: super(Tile, self).delete(*args, **kwargs) for nodeid, value in self.data.items(): node = models.Node.objects.get(nodeid=nodeid) datatype = self.datatype_factory.get_instance(node.datatype) datatype.post_tile_delete(self, nodeid) resource = Resource.objects.get(resourceinstanceid=self.resourceinstance.resourceinstanceid) resource.index() except IntegrityError: logger.error else: self.apply_provisional_edit(user, data={}, action="delete") super(Tile, self).save(*args, **kwargs)
def prepare_resource_relations_index(create=False): """ Creates the settings and mappings in Elasticsearch to support related resources """ index_settings = { "mappings": { "_doc": { "properties": { "resourcexid": { "type": "keyword" }, "notes": { "type": "text" }, "relationshiptype": { "type": "keyword" }, "resourceinstanceidfrom": { "type": "keyword" }, "resourceinstanceidto": { "type": "keyword" }, "created": { "type": "keyword" }, "modified": { "type": "keyword" }, } } } } if create: se = SearchEngineFactory().create() se.create_index(index="resource_relations", body=index_settings) return index_settings
def get(self, request, resourceid=None): if resourceid is not None: se = SearchEngineFactory().create() document = se.search(index='resources', id=resourceid) resource = Resource.objects.get(pk=resourceid) return JSONResponse({ 'graphid': document['_source']['graph_id'], 'graph_name': resource.graph.name, 'displaydescription': document['_source']['displaydescription'], 'map_popup': document['_source']['map_popup'], 'displayname': document['_source']['displayname'], 'geometries': document['_source']['geometries'], }) return HttpResponseNotFound()
def related_resources(request, resourceid): if request.method == 'GET': lang = request.GET.get('lang', settings.LANGUAGE_CODE) start = request.GET.get('start', 0) return JSONResponse(get_related_resources(resourceid, lang, start=start, limit=15), indent=4) if 'edit' in request.user.user_groups and request.method == 'DELETE': se = SearchEngineFactory().create() data = JSONDeserializer().deserialize(request.body) entityid1 = data.get('entityid1') entityid2 = data.get('entityid2') resourcexid = data.get('resourcexid') realtionshiptype = data.get('realtionshiptype') resource = Resource(entityid1) resource.delete_resource_relationship(entityid2, realtionshiptype) se.delete(index='resource_relations', doc_type='all', id=resourcexid) return JSONResponse({'success': True})
def index(self): """ Indexes all the nessesary items values of a resource to support search """ if unicode(self.graph_id) != unicode( settings.SYSTEM_SETTINGS_RESOURCE_MODEL_ID): se = SearchEngineFactory().create() datatype_factory = DataTypeFactory() node_datatypes = { str(nodeid): datatype for nodeid, datatype in models.Node.objects.values_list( 'nodeid', 'datatype') } document, terms = self.get_documents_to_index( datatype_factory=datatype_factory, node_datatypes=node_datatypes) document['root_ontology_class'] = self.get_root_ontology() se.index_data('resource', self.graph_id, JSONSerializer().serializeToPython(document), id=self.pk) for term in terms: se.index_data('strings', 'term', term['_source'], id=term['_id'])
def index(self, scheme=None): if self.category == 'label': se = SearchEngineFactory().create() data = JSONSerializer().serializeToPython(self) if scheme == None: scheme = self.get_scheme_id() if scheme == None: raise Exception('Index of label failed. Index type (scheme id) could not be derived from the label.') se.create_mapping('concept_labels', scheme.id, fieldname='conceptid', fieldtype='string', fieldindex='not_analyzed') se.index_data('concept_labels', scheme.id, data, 'id') # don't create terms for entity type concepts if not(scheme.id == '00000000-0000-0000-0000-000000000003' or scheme.id == '00000000-0000-0000-0000-000000000004'): se.index_term(self.value, self.id, scheme.id, {'conceptid': self.conceptid})
def index(self): """ Indexes all the nessesary items values of a resource to support search """ if str(self.graph_id) != str( settings.SYSTEM_SETTINGS_RESOURCE_MODEL_ID): se = SearchEngineFactory().create() datatype_factory = DataTypeFactory() node_datatypes = { str(nodeid): datatype for nodeid, datatype in models.Node.objects.values_list( "nodeid", "datatype") } document, terms = self.get_documents_to_index( datatype_factory=datatype_factory, node_datatypes=node_datatypes) document["root_ontology_class"] = self.get_root_ontology() doc = JSONSerializer().serializeToPython(document) se.index_data(index="resources", body=doc, id=self.pk) for term in terms: se.index_data("terms", body=term["_source"], id=term["_id"]) for index in settings.ELASTICSEARCH_CUSTOM_INDEXES: es_index = import_class_from_string(index["module"])( index["name"]) document, doc_id = es_index.get_documents_to_index( self, document["tiles"]) es_index.index_document(document=document, id=doc_id)
def prepare_search_index(self, resource_type_id, create=False): """ Creates the settings and mappings in Elasticsearch to support resource search """ index_settings = super(Resource, self).prepare_search_index(resource_type_id, create=False) index_settings['mappings'][resource_type_id]['properties'][ 'date_groups'] = { 'properties': { 'conceptid': { 'type': 'string', 'index': 'not_analyzed' } } } if create: se = SearchEngineFactory().create() try: se.create_index(index='entity', body=index_settings) except: index_settings = index_settings['mappings'] se.create_mapping(index='entity', doc_type=resource_type_id, body=index_settings)
def index_resources(): """ Deletes any existing indicies from elasticsearch related to resources and then indexes all resources from the database """ result_summary = {} se = SearchEngineFactory().create() # clear existing indexes for index_type in ['resource_relations', 'entity', 'resource', 'maplayers']: se.delete_index(index=index_type) se.delete(index='term', body='{"query":{"bool":{"must":[{"constant_score":{"filter":{"missing":{"field":"value.options.conceptid"}}}}],"must_not":[],"should":[]}}}') Resource().prepare_term_index(create=True) cursor = connection.cursor() cursor.execute("""select entitytypeid from data.entity_types where isresource = TRUE""") resource_types = cursor.fetchall() Resource().prepare_resource_relations_index(create=True) for resource_type in resource_types: Resource().prepare_search_index(resource_type[0], create=True) index_resources_by_type(resource_types, result_summary) se.es.indices.refresh(index='entity') for resource_type in resource_types: result_summary[resource_type[0]]['indexed'] = se.es.count(index="entity", doc_type=resource_type[0])['count'] print '\nResource Index Results:' for k, v in result_summary.iteritems(): status = 'Passed' if v['database'] == v['indexed'] else 'failed' print "Status: {0}, Resource Type: {1}, In Database: {2}, Indexed: {3}".format(status, k, v['database'], v['indexed'])
def map_layers(request, entitytypeid='all', get_centroids=False): data = [] geom_param = request.GET.get('geom', None) bbox = request.GET.get('bbox', '') limit = request.GET.get('limit', settings.MAP_LAYER_FEATURE_LIMIT) entityids = request.GET.get('entityid', '') geojson_collection = {"type": "FeatureCollection", "features": []} se = SearchEngineFactory().create() query = Query(se, limit=limit) args = {'index': 'maplayers'} if entitytypeid != 'all': args['doc_type'] = entitytypeid if entityids != '': for entityid in entityids.split(','): geojson_collection['features'].append( se.search(index='maplayers', id=entityid)['_source']) return JSONResponse(geojson_collection) data = query.search(**args) for item in data['hits']['hits']: if get_centroids: item['_source']['geometry'] = item['_source']['properties'][ 'centroid'] item['_source'].pop('properties', None) elif geom_param != None: item['_source']['geometry'] = item['_source']['properties'][ geom_param] item['_source']['properties'].pop('extent', None) item['_source']['properties'].pop(geom_param, None) else: item['_source']['properties'].pop('extent', None) item['_source']['properties'].pop('centroid', None) geojson_collection['features'].append(item['_source']) return JSONResponse(geojson_collection)
def forwards_func(apps, schema_editor): se = SearchEngineFactory().create() prefix = settings.ELASTICSEARCH_PREFIX if (se.es.indices.exists(index="%s_resource_relations" % prefix)): index_settings = prepare_resource_relations_index(create=False) se.create_index(index='resource_relations_temp', body=index_settings) doc = { "source": { "index": "%s_resource_relations" % prefix, "type": "all" }, "dest": { "index": "%s_resource_relations_temp" % prefix, "type": "_doc" } } se.es.reindex(body=doc, refresh=True, wait_for_completion=True) se.delete_index(index='resource_relations') prepare_resource_relations_index(create=True) doc = { "source": { "index": "%s_resource_relations_temp" % prefix, "type": "_doc" }, "dest": { "index": "%s_resource_relations" % prefix, "type": "_doc" } } se.es.reindex(body=doc, refresh=True, wait_for_completion=True)
def test_bulk_delete(self): """ Test bulk deleting of documents in Elasticsearch """ se = SearchEngineFactory().create() # se.create_index(index='test') for i in range(10): x = { 'id': i, 'type': 'prefLabel', 'value': 'test pref label', } se.index_data(index='test', doc_type='test', body=x, idfield='id', refresh=True) y = { 'id': i + 100, 'type': 'altLabel', 'value': 'test alt label', } se.index_data(index='test', doc_type='test', body=y, idfield='id', refresh=True) query = Query(se, start=0, limit=100) match = Match(field='type', query='altLabel') query.add_query(match) query.delete(index='test', refresh=True) self.assertEqual(se.es.count(index='test', doc_type='test')['count'], 10)
def delete(self, user={}, note=''): """ Deletes a single resource and any related indexed data """ se = SearchEngineFactory().create() related_resources = self.get_related_resources(lang="en-US", start=0, limit=1000) for rr in related_resources['resource_relationships']: models.ResourceXResource.objects.get(pk=rr['resourcexid']).delete() query = Query(se) bool_query = Bool() bool_query.filter( Terms(field='resourceinstanceid', terms=[self.resourceinstanceid])) query.add_query(bool_query) results = query.search(index='strings', doc_type='term')['hits']['hits'] for result in results: se.delete(index='strings', doc_type='term', id=result['_id']) se.delete(index='resource', doc_type=str(self.graph_id), id=self.resourceinstanceid) self.save_edit(edit_type='delete') super(Resource, self).delete()
def search_results(request): se = SearchEngineFactory().create() search_results_object = { 'query': Query(se) } include_provisional = get_provisional_type(request) permitted_nodegroups = get_permitted_nodegroups(request.user) search_filter_factory = SearchFilterFactory(request) try: for filter_type, querystring in request.GET.items() + [('search-results', '')]: search_filter = search_filter_factory.get_filter(filter_type) if search_filter: search_filter.append_dsl(search_results_object, permitted_nodegroups, include_provisional) except Exception as err: return JSONResponse(err.message, status=500) dsl = search_results_object.pop('query', None) dsl.include('graph_id') dsl.include('root_ontology_class') dsl.include('resourceinstanceid') dsl.include('points') dsl.include('geometries') dsl.include('displayname') dsl.include('displaydescription') dsl.include('map_popup') dsl.include('provisional_resource') if request.GET.get('tiles', None) is not None: dsl.include('tiles') results = dsl.search(index='resources') if results is not None: # allow filters to modify the results for filter_type, querystring in request.GET.items() + [('search-results', '')]: search_filter = search_filter_factory.get_filter(filter_type) if search_filter: search_filter.post_search_hook(search_results_object, results, permitted_nodegroups) ret = {} ret['results'] = results for key, value in search_results_object.items(): ret[key] = value ret['reviewer'] = request.user.groups.filter(name='Resource Reviewer').exists() ret['timestamp'] = datetime.now() return JSONResponse(ret) else: return HttpResponseNotFound(_("There was an error retrieving the search results"))
def prepare_resource_relations_index(create=False): """ Creates the settings and mappings in Elasticsearch to support related resources """ index_settings = { 'mappings': { 'all': { 'properties': { 'resourcexid': { 'type': 'keyword' }, 'notes': { 'type': 'text' }, 'relationshiptype': { 'type': 'keyword' }, 'resourceinstanceidfrom': { 'type': 'keyword' }, 'resourceinstanceidto': { 'type': 'keyword' } } } } } if create: se = SearchEngineFactory().create() se.create_index(index='resource_relations', body=index_settings, ignore=400) concept = Concept('00000000-0000-0000-0000-000000000007') concept.index() return index_settings
def delete(self, request, resourceid=None): lang = request.GET.get('lang', settings.LANGUAGE_CODE) se = SearchEngineFactory().create() req = dict(request.GET) ids_to_delete = req['resourcexids[]'] root_resourceinstanceid = req['root_resourceinstanceid'] for resourcexid in ids_to_delete: try: ret = models.ResourceXResource.objects.get(pk=resourcexid).delete() except: print 'resource relation does not exist' start = request.GET.get('start', 0) se.es.indices.refresh(index=se._add_prefix("resource_relations")) resource = Resource.objects.get(pk=root_resourceinstanceid[0]) page = 1 if request.GET.get('page') == '' else int(request.GET.get('page', 1)) related_resources = resource.get_related_resources(lang=lang, start=start, limit=1000, page=page) ret = [] if related_resources is not None: ret = self.paginate_related_resources(related_resources, page, request) return JSONResponse(ret, indent=4)
def delete(self, request, resourceid=None): lang = request.GET.get("lang", settings.LANGUAGE_CODE) se = SearchEngineFactory().create() req = dict(request.GET) ids_to_delete = req["resourcexids[]"] root_resourceinstanceid = req["root_resourceinstanceid"] for resourcexid in ids_to_delete: try: ret = models.ResourceXResource.objects.get(pk=resourcexid).delete() except ObjectDoesNotExist: logger.exception(_("Unable to delete. Relationship does not exist")) start = request.GET.get("start", 0) se.es.indices.refresh(index=se._add_prefix("resource_relations")) resource = Resource.objects.get(pk=root_resourceinstanceid[0]) page = 1 if request.GET.get("page") == "" else int(request.GET.get("page", 1)) related_resources = resource.get_related_resources(lang=lang, start=start, limit=1000, page=page) ret = [] if related_resources is not None: ret = self.paginate_related_resources(related_resources, page, request) return JSONResponse(ret, indent=4)
def get_related_resources(resourceid, lang='en-US', limit=1000, start=0): ret = {'resource_relationships': [], 'related_resources': []} se = SearchEngineFactory().create() query = Query(se, limit=limit, start=start) query.add_filter(Terms(field='entityid1', terms=resourceid).dsl, operator='or') resource_relations = query.search(index='resource_relations', doc_type='all') ret['total'] = resource_relations['hits']['total'] for relation in resource_relations['hits']['hits']: ret['resource_relationships'].append(relation['_source']) return ret
def delete(self, *args, **kwargs): se = SearchEngineFactory().create() request = kwargs.pop('request', None) provisional_edit_log_details = kwargs.pop( 'provisional_edit_log_details', None) for tile in self.tiles: tile.delete(*args, request=request, **kwargs) try: user = request.user user_is_reviewer = request.user.groups.filter( name='Resource Reviewer').exists() except AttributeError: #no user user = None if user_is_reviewer is True or self.user_owns_provisional(user): query = Query(se) bool_query = Bool() bool_query.filter(Terms(field='tileid', terms=[self.tileid])) query.add_query(bool_query) results = query.search(index='strings', doc_type='term')['hits']['hits'] for result in results: se.delete(index='strings', doc_type='term', id=result['_id']) self.__preDelete(request) self.save_edit( user=request.user, edit_type='tile delete', old_value=self.data, provisional_edit_log_details=provisional_edit_log_details) super(Tile, self).delete(*args, **kwargs) resource = Resource.objects.get( resourceinstanceid=self.resourceinstance.resourceinstanceid) resource.index() else: self.apply_provisional_edit(user, data={}, action='delete') super(Tile, self).save(*args, **kwargs)
def search_results(request): se = SearchEngineFactory().create() search_results_object = {"query": Query(se)} include_provisional = get_provisional_type(request) permitted_nodegroups = get_permitted_nodegroups(request.user) search_filter_factory = SearchFilterFactory(request) try: for filter_type, querystring in list(request.GET.items()) + [("search-results", "")]: search_filter = search_filter_factory.get_filter(filter_type) if search_filter: search_filter.append_dsl(search_results_object, permitted_nodegroups, include_provisional) except Exception as err: return JSONResponse(err, status=500) dsl = search_results_object.pop("query", None) dsl.include("graph_id") dsl.include("root_ontology_class") dsl.include("resourceinstanceid") dsl.include("points") dsl.include("geometries") dsl.include("displayname") dsl.include("displaydescription") dsl.include("map_popup") dsl.include("provisional_resource") if request.GET.get("tiles", None) is not None: dsl.include("tiles") results = dsl.search(index="resources") if results is not None: # allow filters to modify the results for filter_type, querystring in list(request.GET.items()) + [("search-results", "")]: search_filter = search_filter_factory.get_filter(filter_type) if search_filter: search_filter.post_search_hook(search_results_object, results, permitted_nodegroups) ret = {} ret["results"] = results for key, value in list(search_results_object.items()): ret[key] = value ret["reviewer"] = request.user.groups.filter(name="Resource Reviewer").exists() ret["timestamp"] = datetime.now() ret["total_results"] = dsl.count(index="resources") return JSONResponse(ret) else: return HttpResponseNotFound(_("There was an error retrieving the search results"))
def index_resource_relations(clear_index=True, batch_size=settings.BULK_IMPORT_BATCH_SIZE): """ Indexes all resource to resource relation records Keyword Arguments: clear_index -- set to True to remove all the resources from the index before the reindexing operation batch_size -- the number of records to index as a group, the larger the number to more memory required """ start = datetime.now() print "Indexing resource to resource relations" cursor = connection.cursor() se = SearchEngineFactory().create() if clear_index: q = Query(se=se) q.delete(index='resource_relations') with se.BulkIndexer(batch_size=batch_size, refresh=True) as resource_relations_indexer: sql = """ SELECT resourcexid, resourceinstanceidfrom, notes, relationshiptype, resourceinstanceidto FROM public.resource_x_resource; """ cursor.execute(sql) for resource_relation in cursor.fetchall(): doc = { 'resourcexid': resource_relation[0], 'resourceinstanceidfrom': resource_relation[1], 'notes': resource_relation[2], 'relationshiptype': resource_relation[3], 'resourceinstanceidto': resource_relation[4] } resource_relations_indexer.add(index='resource_relations', id=doc['resourcexid'], data=doc) index_count = se.count(index='resource_relations') print "Status: {0}, In Database: {1}, Indexed: {2}, Took: {3} seconds".format('Passed' if cursor.rowcount == index_count else 'Failed', cursor.rowcount, index_count, (datetime.now()-start).seconds)
def bulk_save(resources): """ Saves and indexes a list of resources Arguments: resources -- a list of resource models """ se = SearchEngineFactory().create() datatype_factory = DataTypeFactory() node_datatypes = { str(nodeid): datatype for nodeid, datatype in models.Node.objects.values_list( 'nodeid', 'datatype') } tiles = [] documents = [] term_list = [] # flatten out the nested tiles into a single array for resource in resources: for parent_tile in resource.tiles: for child_tile in parent_tile.tiles.itervalues(): if len(child_tile) > 0: resource.tiles.extend(child_tile) parent_tile.tiles = {} tiles.extend(resource.tiles) # need to save the models first before getting the documents for index Resource.objects.bulk_create(resources) TileModel.objects.bulk_create(tiles) for resource in resources: resource.save_edit(edit_type='create') document, terms = resource.get_documents_to_index( fetchTiles=False, datatype_factory=datatype_factory, node_datatypes=node_datatypes) document['root_ontology_class'] = resource.get_root_ontology() documents.append( se.create_bulk_item(index='resource', doc_type=document['graph_id'], id=document['resourceinstanceid'], data=document)) for term in terms: term_list.append( se.create_bulk_item(index='strings', doc_type='term', id=term['_id'], data=term['_source'])) for tile in tiles: tile.save_edit(edit_type='tile create', new_value=tile.data) # bulk index the resources, tiles and terms se.bulk_index(documents) se.bulk_index(term_list)
def index(self, scheme=None): if self.category == 'label': se = SearchEngineFactory().create() data = JSONSerializer().serializeToPython(self) if scheme == None: scheme = self.get_scheme_id() if scheme == None: raise Exception('Index of label failed. Index type (scheme id) could not be derived from the label.') se.create_mapping('concept_labels', scheme.id, fieldname='conceptid', fieldtype='string', fieldindex='not_analyzed') se.index_data('concept_labels', scheme.id, data, 'id') #Looks up whether the label is actually a dropdown label or an entity label and, if so, excludes them from the term search index. entity_or_dropdown= archesmodels.ConceptRelations.objects.filter(Q(relationtype ='hasCollection') | Q(relationtype ='hasEntity'),conceptidto = scheme.id) is_entity_or_dropdown = False if entity_or_dropdown.count() == 0 else True # don't create terms for entity type concepts if not(scheme.id == '00000000-0000-0000-0000-000000000003' or scheme.id == '00000000-0000-0000-0000-000000000004') and is_entity_or_dropdown ==False: se.index_term(self.value, self.id, scheme.id, {'conceptid': self.conceptid})
def get_search_contexts(request): search_context = {} search_context = cache.get('search_contexts') if search_context is not None: #print 'Search_context iz cacha!' return search_context lang = request.GET.get('lang', settings.LANGUAGE_CODE) se1 = SearchEngineFactory().create() context_label1 = '-' search_context = {} for search_term in settings.SEARCH_TERMS: searchString1 = search_term['text'] print searchString1 query1 = Query(se1, start=0, limit=settings.SEARCH_DROPDOWN_LENGTH) boolquery1 = Bool() boolquery1.should(Match(field='term', query=searchString1.lower(), type='phrase_prefix', fuzziness='AUTO')) boolquery1.should(Match(field='term.folded', query=searchString1.lower(), type='phrase_prefix', fuzziness='AUTO')) boolquery1.should(Match(field='term.folded', query=searchString1.lower(), fuzziness='AUTO')) query1.add_query(boolquery1) results1 = query1.search(index='term', doc_type='value') conceptid1 = '' context1 = '' for result1 in results1['hits']['hits']: prefLabel = get_preflabel_from_conceptid(result1['_source']['context'], lang) result1['_source']['options']['context_label'] = prefLabel['value'] if (prefLabel['value'] == search_term['context_label'] and result1['_source']['term'] == search_term['text']): conceptid1 = result1['_source']['options']['conceptid'] context1 = result1['_source']['context'] #print search_term['context_label'] + ': ' + conceptid1 #print searchString1 #print result1 result = {'conceptid': conceptid1, 'context': context1} if context_label1 <> search_term['context_label']: value = {} print result value[search_term['text_key']] = result #print value search_context[search_term['context_key']] = value #print search_context #print 'Iscem [' + search_term['context_label'] + '][' + search_term['text'] + ']' #print value context_label1 = search_term['context_label'] #print search_context #print search_context['Historical_Period']['BRONZE_AGE'] #print 'Shranjujem search_context v cache' cache.set('search_contexts', search_context, 86400) return search_context