def _get_child_concepts(conceptid): se = SearchEngineFactory().create() ret = se.search(conceptid, index='concept', type='all', search_field='conceptid', use_phrase=True) left = ret['hits']['hits'][0]['_source']['left'] right = ret['hits']['hits'][0]['_source']['right'] concepts = se.search({ 'from': left, 'to': right }, index='concept', type='all', search_field='left', use_range=True) ret = [] for concept in concepts['hits']['hits']: for label in concept['_source']['labels']: ret.append(label['labelid']) return ret
def search_terms(request): se = SearchEngineFactory().create() searchString = request.GET['q'] return HttpResponse(JSONSerializer().serialize(se.search( searchString.lower(), index='term', type='value', search_field='term', use_fuzzy=True), ensure_ascii=False))
def MapLayers(request, entitytypeid): data = [] bbox = request.GET['bbox'] limit = request.GET.get('limit', 10000) try: se = SearchEngineFactory().create() data = se.search('', index="maplayers", type=entitytypeid, end_offset=limit) except: pass return HttpResponse(JSONSerializer().serialize(data, ensure_ascii=True, indent=4))
def Search(request): se = SearchEngineFactory().create() searchString = request.GET['q'] search_results = _normalize_spatial_results_to_wkt( se.search(searchString.lower(), index='entity', type='', search_field='strings', use_phrase=True)) return HttpResponse(JSONSerializer().serialize(search_results, ensure_ascii=False))
def Concept(request, ids): full_graph = request.GET.get('full_graph', 'true') == 'true' exclude_subconcepts = request.GET.get('exclude_subconcepts', 'false') == 'true' exclude_parentconcepts = request.GET.get('exclude_parentconcepts', 'false') == 'true' exclude_notes = request.GET.get('exclude_notes', 'false') == 'true' exclude_labels = request.GET.get('exclude_labels', 'false') == 'true' exclude_metadata = request.GET.get('exclude_metadata', 'false') == 'true' emulate_elastic_search = request.GET.get('emulate_elastic_search', 'true') == 'true' fromdb = request.GET.get('fromdb', 'false') == 'true' ret = [] if request.method == 'GET': if fromdb: for id in ids.split(','): if ".E" in id: entitytype = archesmodels.EntityTypes.objects.get(pk=id) concept = entitytype.conceptid else: concept = archesmodels.Concepts.objects.get(conceptid=id) concept_graph = concept.toObject( full_graph=full_graph, exclude_subconcepts=exclude_subconcepts, exclude_parentconcepts=exclude_parentconcepts, exclude_notes=exclude_notes, exclude_labels=exclude_labels, exclude_metadata=exclude_metadata) if emulate_elastic_search: ret.append({'_type': id, '_source': concept_graph}) else: ret.append(concept_graph) if emulate_elastic_search: ret = {'hits': {'hits': ret}} else: se = SearchEngineFactory().create() ret = se.search('', index='concept', type=ids, search_field='value', use_wildcard=True) return HttpResponse(JSONSerializer().serialize(ret, ensure_ascii=True, indent=4))
def index_concepts_for_search(self): # see http://sqlblog.com/blogs/adam_machanic/archive/2006/07/12/swinging-from-tree-to-tree-using-ctes-part-1-adjacency-to-nested-sets.aspx # Value of Lft for the root node is 1 # Value of Rgt for the root node is 2 * (Number of nodes) # Value of Lft for any node is ((Number of nodes visited) * 2) - (Level of current node) # Value of Rgt for any node is (Lft value) + ((Number of subnodes) * 2) + 1 sys.setrecursionlimit(3000) se = SearchEngineFactory().create() se.create_mapping('concept', 'all', 'conceptid', 'string', 'not_analyzed') se.create_mapping('concept', 'all', 'labelid', 'string', 'not_analyzed') def _findNarrowerConcept(conceptid, ret=None, limit=200000, level=1): returnobj = {'subnodes': 0} if ret == None: # the root node labels = archesmodels.Values.objects.filter(conceptid = conceptid) ret = {} nodesvisited = len(ret) + 1 ret[conceptid] = {'labels': [], 'left': (nodesvisited*2)-level, 'right': 0} for label in labels: ret[conceptid]['labels'].append({'labelid': label.pk, 'label': label.value}) level = level + 1 conceptrealations = archesmodels.ConceptRelations.objects.filter(conceptidfrom = conceptid) for relation in conceptrealations: nodesvisited = len(ret) + 1 labels = archesmodels.Values.objects.filter(conceptid = relation.conceptidto) ret[relation.conceptidto_id] = {'labels': [], 'left': (nodesvisited*2)-level, 'right': 0} for label in labels: ret[relation.conceptidto_id]['labels'].append({'labelid': label.pk, 'label': label.value}) returnobj = _findNarrowerConcept(relation.conceptidto_id, ret=ret, level=level+1) subnodes = returnobj['subnodes'] if subnodes == 0: # meaning we're at a leaf node ret[conceptid]['right'] = ret[conceptid]['left'] + 1 else: ret[conceptid]['right'] = subnodes + 1 return {'all_concepts': ret, 'subnodes': ret[conceptid]['right']} label = archesmodels.Values.objects.get(value = "Concepts") concepts = _findNarrowerConcept(label.conceptid_id) all_concepts = [] for key, concept in concepts['all_concepts'].iteritems(): all_concepts.append({'conceptid': key, 'labels': concept['labels'], 'left': concept['left'], 'right': concept['right']}) self.index(all_concepts, 'concept', 'all', 'conceptid')
def delete_index(self): """ removes an entity from the search index assumes that self is an asset entity """ if self.get_rank() == 0: se = SearchEngineFactory().create() def delete_indexes(entity): if entity.get_rank() == 0: se.delete(index='entity', type=entity.entitytypeid, id=entity.entityid) if entity.entitytypeid in settings.ENTITY_TYPE_FOR_MAP_DISPLAY: se.delete(index='maplayers', type=self.entitytypeid, id=entity.entityid) if entity.entitytypeid in settings.SEARCHABLE_ENTITY_TYPES: se.delete_terms(entity) entity = Entity().get(self.entityid) entity.traverse(delete_indexes)
def Concept(request, ids): full_graph = request.GET.get('full_graph', 'true') == 'true' exclude_subconcepts = request.GET.get('exclude_subconcepts', 'false') == 'true' exclude_parentconcepts = request.GET.get('exclude_parentconcepts', 'false') == 'true' exclude_notes = request.GET.get('exclude_notes', 'false') == 'true' exclude_labels = request.GET.get('exclude_labels', 'false') == 'true' exclude_metadata = request.GET.get('exclude_metadata', 'false') == 'true' emulate_elastic_search = request.GET.get('emulate_elastic_search', 'true') == 'true' fromdb = request.GET.get('fromdb', 'false') == 'true' ret = [] if request.method == 'GET': if fromdb: for id in ids.split(','): if ".E" in id: entitytype = archesmodels.EntityTypes.objects.get(pk = id) concept = entitytype.conceptid else: concept = archesmodels.Concepts.objects.get(conceptid = id) concept_graph = concept.toObject(full_graph=full_graph, exclude_subconcepts=exclude_subconcepts, exclude_parentconcepts=exclude_parentconcepts, exclude_notes=exclude_notes, exclude_labels=exclude_labels, exclude_metadata=exclude_metadata) if emulate_elastic_search: ret.append({'_type': id, '_source': concept_graph}) else: ret.append(concept_graph) if emulate_elastic_search: ret = {'hits':{'hits':ret}} else: se = SearchEngineFactory().create() ret = se.search('', index='concept', type=ids, search_field='value', use_wildcard=True) return HttpResponse(JSONSerializer().serialize(ret, ensure_ascii=True, indent=4))
def index(self, documents, index, type, idfield, processdoc=None, getid=None, bulk=False): detail = '' bulkitems = [] errorlist = [] se = SearchEngineFactory().create() if not isinstance(documents, list): documents = [documents] for document in documents: #print "inserting document: %s" % (document) sys.stdout.write('.') if processdoc == None: data = document else: data = processdoc(document) id = None if getid != None: id = getid(document, data) try: if bulk: bulkitem = se.create_bulk_item(index, type, id, data) bulkitems.append(bulkitem[0]) bulkitems.append(bulkitem[1]) else: se.index_data(index, type, data, idfield=idfield, id=id) except Exception as detail: errorlist.append(id) if bulk: try: se.bulk_index(index, type, bulkitems) except Exception as detail: errorlist = bulkitems print 'bulk inset failed' if detail != '': print "\n\nException detail: %s " % (detail) print "There was a problem indexing the following items:" print errorlist
def delete_index(self, index): se = SearchEngineFactory().create() se.delete(index=index, force=True)
def index(self): """ Gets a SearchResult object for a given asset entity Used for populating the search index with searchable entity information """ if self.get_rank() == 0: se = SearchEngineFactory().create() search_result = {} search_result['entityid'] = self.entityid search_result['entitytypeid'] = self.entitytypeid search_result['strings'] = [] search_result['geometries'] = [] search_result['concepts'] = [] term_entities = [] names = [] for name in self.get_primary_display_name(): names.append(name.value) primary_display_name = ' '.join(names) search_result['primaryname'] = primary_display_name for enititytype in settings.SEARCHABLE_ENTITY_TYPES: for entity in self.find_entities_by_type_id(enititytype): search_result['strings'].append(entity.value) term_entities.append(entity) for geom_entity in self.find_entities_by_type_id( settings.ENTITY_TYPE_FOR_MAP_DISPLAY): search_result['geometries'].append( fromstr(geom_entity.value).json) mapfeature = MapFeature() mapfeature.geomentityid = geom_entity.entityid mapfeature.entityid = self.entityid mapfeature.entitytypeid = self.entitytypeid mapfeature.primaryname = primary_display_name mapfeature.geometry = geom_entity.value data = JSONSerializer().serializeToPython(mapfeature, ensure_ascii=True, indent=4) se.index_data('maplayers', self.entitytypeid, data, idfield='geomentityid') def to_int(s): try: return int(s) except ValueError: return '' def inspect_node(entity): if entity.entitytypeid in settings.ADV_SEARCHABLE_ENTITY_TYPES or entity.entitytypeid in settings.SEARCHABLE_ENTITY_TYPES: if entity.entitytypeid not in search_result: search_result[entity.entitytypeid] = [] if entity.entitytypeid in settings.ENTITY_TYPE_FOR_MAP_DISPLAY: search_result[entity.entitytypeid].append( JSONDeserializer().deserialize( fromstr(entity.value).json)) else: search_result[entity.entitytypeid].append(entity.value) self.traverse(inspect_node) for entitytype, value in search_result.iteritems(): if entitytype in settings.ADV_SEARCHABLE_ENTITY_TYPES or entitytype in settings.SEARCHABLE_ENTITY_TYPES: if entitytype in settings.ENTITY_TYPE_FOR_MAP_DISPLAY: se.create_mapping('entity', self.entitytypeid, entitytype, 'geo_shape') else: try: uuid.UUID(value[0]) # SET FIELDS WITH UUIDS TO BE "NOT ANALYZED" IN ELASTIC SEARCH se.create_mapping('entity', self.entitytypeid, entitytype, 'string', 'not_analyzed') except (ValueError): pass search_result[entitytype] = list( set(search_result[entitytype])) data = JSONSerializer().serializeToPython(search_result, ensure_ascii=True, indent=4) se.index_data('entity', self.entitytypeid, data, idfield=None, id=self.entityid) se.create_mapping('term', 'value', 'entityids', 'string', 'not_analyzed') se.index_terms(term_entities) return search_result
def index_concepts_for_search(self): # see http://sqlblog.com/blogs/adam_machanic/archive/2006/07/12/swinging-from-tree-to-tree-using-ctes-part-1-adjacency-to-nested-sets.aspx # Value of Lft for the root node is 1 # Value of Rgt for the root node is 2 * (Number of nodes) # Value of Lft for any node is ((Number of nodes visited) * 2) - (Level of current node) # Value of Rgt for any node is (Lft value) + ((Number of subnodes) * 2) + 1 sys.setrecursionlimit(3000) se = SearchEngineFactory().create() se.create_mapping('concept', 'all', 'conceptid', 'string', 'not_analyzed') se.create_mapping('concept', 'all', 'labelid', 'string', 'not_analyzed') def _findNarrowerConcept(conceptid, ret=None, limit=200000, level=1): returnobj = {'subnodes': 0} if ret == None: # the root node labels = archesmodels.Values.objects.filter( conceptid=conceptid) ret = {} nodesvisited = len(ret) + 1 ret[conceptid] = { 'labels': [], 'left': (nodesvisited * 2) - level, 'right': 0 } for label in labels: ret[conceptid]['labels'].append({ 'labelid': label.pk, 'label': label.value }) level = level + 1 conceptrealations = archesmodels.ConceptRelations.objects.filter( conceptidfrom=conceptid) for relation in conceptrealations: nodesvisited = len(ret) + 1 labels = archesmodels.Values.objects.filter( conceptid=relation.conceptidto) ret[relation.conceptidto_id] = { 'labels': [], 'left': (nodesvisited * 2) - level, 'right': 0 } for label in labels: ret[relation.conceptidto_id]['labels'].append({ 'labelid': label.pk, 'label': label.value }) returnobj = _findNarrowerConcept(relation.conceptidto_id, ret=ret, level=level + 1) subnodes = returnobj['subnodes'] if subnodes == 0: # meaning we're at a leaf node ret[conceptid]['right'] = ret[conceptid]['left'] + 1 else: ret[conceptid]['right'] = subnodes + 1 return {'all_concepts': ret, 'subnodes': ret[conceptid]['right']} label = archesmodels.Values.objects.get(value="Concepts") concepts = _findNarrowerConcept(label.conceptid_id) all_concepts = [] for key, concept in concepts['all_concepts'].iteritems(): all_concepts.append({ 'conceptid': key, 'labels': concept['labels'], 'left': concept['left'], 'right': concept['right'] }) self.index(all_concepts, 'concept', 'all', 'conceptid')
def index(self): """ Gets a SearchResult object for a given asset entity Used for populating the search index with searchable entity information """ if self.get_rank() == 0: se = SearchEngineFactory().create() search_result = {} search_result['entityid'] = self.entityid search_result['entitytypeid'] = self.entitytypeid search_result['strings'] = [] search_result['geometries'] = [] search_result['concepts'] = [] term_entities = [] names = [] for name in self.get_primary_display_name(): names.append(name.value) primary_display_name = ' '.join(names) search_result['primaryname'] = primary_display_name for enititytype in settings.SEARCHABLE_ENTITY_TYPES: for entity in self.find_entities_by_type_id(enititytype): search_result['strings'].append(entity.value) term_entities.append(entity) for geom_entity in self.find_entities_by_type_id(settings.ENTITY_TYPE_FOR_MAP_DISPLAY): search_result['geometries'].append(fromstr(geom_entity.value).json) mapfeature = MapFeature() mapfeature.geomentityid = geom_entity.entityid mapfeature.entityid = self.entityid mapfeature.entitytypeid = self.entitytypeid mapfeature.primaryname = primary_display_name mapfeature.geometry = geom_entity.value data = JSONSerializer().serializeToPython(mapfeature, ensure_ascii=True, indent=4) se.index_data('maplayers', self.entitytypeid, data, idfield='geomentityid') def to_int(s): try: return int(s) except ValueError: return '' def inspect_node(entity): if entity.entitytypeid in settings.ADV_SEARCHABLE_ENTITY_TYPES or entity.entitytypeid in settings.SEARCHABLE_ENTITY_TYPES: if entity.entitytypeid not in search_result: search_result[entity.entitytypeid] = [] if entity.entitytypeid in settings.ENTITY_TYPE_FOR_MAP_DISPLAY: search_result[entity.entitytypeid].append(JSONDeserializer().deserialize(fromstr(entity.value).json)) else: search_result[entity.entitytypeid].append(entity.value) self.traverse(inspect_node) for entitytype, value in search_result.iteritems(): if entitytype in settings.ADV_SEARCHABLE_ENTITY_TYPES or entitytype in settings.SEARCHABLE_ENTITY_TYPES: if entitytype in settings.ENTITY_TYPE_FOR_MAP_DISPLAY: se.create_mapping('entity', self.entitytypeid, entitytype, 'geo_shape') else: try: uuid.UUID(value[0]) # SET FIELDS WITH UUIDS TO BE "NOT ANALYZED" IN ELASTIC SEARCH se.create_mapping('entity', self.entitytypeid, entitytype, 'string', 'not_analyzed') except(ValueError): pass search_result[entitytype] = list(set(search_result[entitytype])) data = JSONSerializer().serializeToPython(search_result, ensure_ascii=True, indent=4) se.index_data('entity', self.entitytypeid, data, idfield=None, id=self.entityid) se.create_mapping('term', 'value', 'entityids', 'string', 'not_analyzed') se.index_terms(term_entities) return search_result